From 5cf8276a8dfee33c9145b76093d181cceafd3832 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 12 Jun 2018 16:22:02 -0600 Subject: [PATCH 01/22] update for new geyser cent74 OS - envoked from cheyenne with "execgy -r cent74" in preparation for geyser upgrade during week of June 19, 2018. Also envoking the new ncar_pylib virtualenv in place of cesm-env2 virtualenv and then loading CESM_postprocessing packages into the new NPL virtualenv --- Machines/geyser_modules | 34 ++++++++++++---------------------- create_python_env | 13 ++++++++----- 2 files changed, 20 insertions(+), 27 deletions(-) diff --git a/Machines/geyser_modules b/Machines/geyser_modules index 64cbd7ba..b32ab6fa 100755 --- a/Machines/geyser_modules +++ b/Machines/geyser_modules @@ -1,32 +1,22 @@ -#!/bin/sh +#!/bin/bash -l +. /etc/bash.bashrc echo "Python boot-strap modules for machine geyser" -. /glade/apps/opt/lmod/lmod/init/bash -module load python/2.7.7 -module load intel/16.0.3 +module load python/2.7.14 +module load intel/17.0.1 module load ncarenv -module load ncarbinlibs module load ncarcompilers -module load slurm/17 -module load impi/5.1.1.109 -module load numpy/1.11.0 -module load scipy/0.18.1 -module load mpi4py/2.0.0-impi -module load pynio/1.4.1 -module load pyside/1.1.2 -module load matplotlib/1.5.1 -module load netcdf/4.3.0 -module load nco/4.4.4 +module load impi +module load netcdf/4.6.1 +module load nco/4.7.4 module load ncl/6.4.0 -module load netcdf4python/1.2.4 -module load pyngl/1.4.0 +# still need pynio and pyngl installed in the geyser virtualenv +# Brian V. will work to install -# prepend the virtualenv into the PATH -PATH=/glade/apps/contrib/virtualenv/12.0.7:${PATH} -export PATH - -# may need basemap for ocn ecosys +# clone the geyser virtualenv first with helper script ncar_pylib +# use "ncar_pylib --help" to see all options +ncar_pylib -c 20180510 ${pp_dir}/cesm-env2 module list diff --git a/create_python_env b/create_python_env index 6a8f306e..34cc441d 100755 --- a/create_python_env +++ b/create_python_env @@ -58,6 +58,7 @@ progname=`basename $0` # need absolute path (rather than relative path) because we use this # path to get to the machines directory pp_dir=$(absolute_path `dirname $0`) +export pp_dir #---------------------------------------------------------------------- # Set default return values @@ -136,7 +137,7 @@ fi #---------------------------------------------------------------------- env="${pp_dir}/cesm-env2" echo $env -if [ -f $env ]; then +if [ -f $env ] && [ ${machine} != geyser ]; then status="ERROR" info="$progname - ${pp_dir}/cesm-env2 virtual environment already exists. It is only necessary to create the virtual environment once for a given machine. @@ -162,10 +163,12 @@ cd $pp_dir # already setup, so only done once per case. #---------------------------------------------------------------------- echo "$progname - making virtual environment in ${pp_dir}/cesm-env2." -make env -if [ $? -ne 0 ]; then - echo "ERROR: Unable to create virtual environment in ${pp_dir}/cesm-env2. Exiting..." - exit 1 +if [ ${machine} != geyser ]; then + make env + if [ $? -ne 0 ]; then + echo "ERROR: Unable to create virtual environment in ${pp_dir}/cesm-env2. Exiting..." + exit 1 + fi fi #---------------------------------------------------------------------- From 19763ca4d258f4ad5a4bb29fcc6150f12608f5f5 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Mon, 2 Jul 2018 17:19:10 -0600 Subject: [PATCH 02/22] update for geyser ncar_pylib virtualenv integration --- Templates/batch_cheyenne.tmpl | 2 ++ Templates/batch_geyser.tmpl | 2 ++ Templates/postprocess.tmpl | 7 +++---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Templates/batch_cheyenne.tmpl b/Templates/batch_cheyenne.tmpl index 4d1d8041..6168cd6e 100644 --- a/Templates/batch_cheyenne.tmpl +++ b/Templates/batch_cheyenne.tmpl @@ -1,3 +1,5 @@ +#! /usr/bin/env/bash + #PBS -N {{ processName }} #PBS -q {{ queue }} #PBS -l select={{ nodes }}:ncpus={{ ppn }}:mpiprocs={{ ppn }} diff --git a/Templates/batch_geyser.tmpl b/Templates/batch_geyser.tmpl index 4f0b7287..feb0d5fe 100644 --- a/Templates/batch_geyser.tmpl +++ b/Templates/batch_geyser.tmpl @@ -1,3 +1,5 @@ +#! /bin/bash -1 + #SBATCH -n {{ pes }} #SBATCH -N {{ nodes }} #SBATCH --ntasks-per-node={{ ppn }} diff --git a/Templates/postprocess.tmpl b/Templates/postprocess.tmpl index f7b5caa1..3f516a62 100644 --- a/Templates/postprocess.tmpl +++ b/Templates/postprocess.tmpl @@ -1,4 +1,5 @@ -#! /usr/bin/env bash +{{ batchdirectives }} + ########## ## ## See https://github.com/NCAR/CESM_postprocessing/wiki for details @@ -6,14 +7,12 @@ ## ########## -{{ batchdirectives }} - if [ ! -e {{ virtualEnvDir }} ]; then echo "*************************************************************************************" echo "CESM {{ processName }} exiting due to non-existant python virtual environment in" echo " {{ virtualEnvDir }}" echo "You must first run:" - echo "$SRCROOT/postprocessing/create_python_env.sh -machine [machine]" + echo "$POSTPROCESS_PATH/create_python_env.sh -machine [machine]" echo "*************************************************************************************" exit fi From 79b340588949bd92124fd3438f054b9dbf1f9990 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 3 Jul 2018 14:21:01 -0600 Subject: [PATCH 03/22] update PATH to include udunits2.h --- Machines/cheyenne_modules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Machines/cheyenne_modules b/Machines/cheyenne_modules index ebf22df0..28797930 100755 --- a/Machines/cheyenne_modules +++ b/Machines/cheyenne_modules @@ -24,6 +24,6 @@ module load cf_units/1.1.3 module load pyngl/1.5.0b # prepend the virtualenv into the PATH -PATH=/glade/u/apps/ch/opt/python/2.7.13/gnu/6.2.0/bin:${PATH} +PATH=/glade/u/apps/ch/opt/udunits/2.2.26/gnu/6.3.0/include:/glade/u/apps/ch/opt/python/2.7.13/gnu/6.2.0/bin:${PATH} export PATH From 48aaafdccb98ed9bd71ef445eba584ffc58ac7d6 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 3 Jul 2018 14:21:27 -0600 Subject: [PATCH 04/22] Squashed 'ilamb/ilamb/' changes from f46c647..5f35ec3 5f35ec3 cf_units requires more recent numpy but does not have it in their requirements 4bf570e 2.3 release f1ce475 doc cleanup from cfunits move 3c88a3a move from cfunits to cf_units 515cfb1 added initial logic will excludes outliers when computing functional relationships eed6fb0 fixed a bug which would cause a crash if non-string global attributes were found in the netCDF files. Thanks to Declan Valters for the report 9197e39 added plot limits to the summary images 9af8e66 added a derived expression for twsa and required code support 919c5d6 updated the gold scores, fixed an error in handling the bias score in biomass, large differences possible 69d7d85 force a mask 0ab4b6c fixed some unit issues 70d6042 moved where some scalars are saved in the analysis 3348b64 neglected to put writes where we can be sure datasets are defined f59b823 restored site analysis, needs revamped 2c3136a finished memory refactor on the spatial analysis 78ce3e5 removed a duplicated function definition 0fb6a7f first pass at memory optimzation 40fa2e7 added a pass functionality to unit conversions 3968d67 added a nbytes routine to determine the size of Variables b152fd9 rework of sympifywithargs to reduce memory usage a9cefa0 changes to scores based on added iav 922d9ac removed numpy restriction cb7f921 removed /= because it is causing problems with numpy 1.14 c454c2d restored iav to the analysis b82ffa0 Merge branch 'ncollier/grace-change' 3673d67 changes in gold score to reflect methodology change 2420b0c change of directory name to reflect a last name and not a acronym 0a7636c added an attempt to purge C for carbon in units 23fdf42 added site plots to runoff and fixed twsa/runoff plots to show by basin in the all models tab acbdaca small fix to location of a dataset 49d8eab added a permafrost extent comparison 67c86b4 added a snow water equivalent dataset d110066 Merge branch 'master' of bitbucket.org:ncollier/ilamb 75a417a numpy 1.14.0 was found to give an error when using the /= operator on masked arrays. For now limiting the upper limit on the version f6c7692 Relaxed a requirement that the reference and comparison datasets must be uniformly spatial. This allows us to use a spatial observational dataset to compare against models run at sites. d81f144 added a reference explaining why we have changed the methodology 7a2e751 changes to grace methodology based on Swenson's suggestions f688075 missed a barrier which can be problematic when trying to render composite plots and compute relationships 3fae4d8 wrong import d58c5e4 changes in gold scores due to method change 4997958 rework and abstraction of relationship analysis, fixed warnings in plot generation e5d50a4 added more horizontal width per variable label length d19c523 added a --rel_only option to ilamb-run that will render summary plots without absolute scores 2968f2e safety if no relationships exist e4857ed fixed relationship plot ce53afd first pass at a tool for extracting scalar scores from files in csv format 4aa220d shifts in scoring of bias and rmse 3648557 conditional if no relationships exist git-subtree-dir: ilamb/ilamb git-subtree-split: 5f35ec3e07f7ea8441429ea0c3e864d1aa8340e1 --- README.rst | 41 ++- bin/ilamb-run | 9 +- bin/ilamb-table | 65 ++++ demo/ilamb.cfg | 26 ++ doc/install.rst | 4 +- setup.py | 10 +- src/ILAMB/ConfPermafrost.py | 223 +++++++++++++ src/ILAMB/ConfRunoff.py | 44 ++- src/ILAMB/ConfTWSA.py | 243 ++++++++------ src/ILAMB/Confrontation.py | 491 +++++++++++++++------------- src/ILAMB/ModelResult.py | 1 - src/ILAMB/Post.py | 28 +- src/ILAMB/Scoreboard.py | 130 ++------ src/ILAMB/Variable.py | 101 +++--- src/ILAMB/__init__.py | 6 +- src/ILAMB/constants.py | 22 ++ src/ILAMB/ilamblib.py | 633 ++++++++++++++++++++++++------------ test/scores_test.csv.gold | 16 +- 18 files changed, 1394 insertions(+), 699 deletions(-) create mode 100644 bin/ilamb-table create mode 100644 src/ILAMB/ConfPermafrost.py diff --git a/README.rst b/README.rst index 86120b54..ebbe75ee 100644 --- a/README.rst +++ b/README.rst @@ -31,27 +31,38 @@ Useful Information * `CLM `_ - land comparison against 3 CLM versions and 2 forcings * `CMIP5 `_ - land comparison against a collection of CMIP5 models * `IOMB `_ - ocean comparison against a few ocean models - + +* Paper `preprint `_ which + details the design and methodology employed in the ILAMB package * If you find the package or the ouput helpful in your research or development efforts, we kindly ask you to cite the following reference (DOI:10.18139/ILAMB.v002.00/1251621). -ILAMB 2.2 Release +ILAMB 2.3 Release ----------------- -We are pleased to announce version 2.2 of the ILAMB python package. Among many small bugfixes and enhancements, the new version contains the following new features: - -* A new installed command ``ilamb-fetch`` has been included which can be run to automatically download the observational datasets. Running this command after the data has been downloaded will check your collection for updates and consistency. -* A new installed command ``ilamb-doctor`` has been included which can be run with options similar to ``ilamb-run`` to help identify which values a particular configure file needs in order to run. -* ILAMB will now check the spatial extents of all the models present in the current run and clip away to the largest shared extent. This allows ILAMB to be applied to regional models. -* User-defined regions can now be added at runtime either by specifying latitude/longitude bounds, or a mask in a netCDF4 file. For specifics, consult the regions `tutorial `_. -* Added a runoff and evaporative fraction benchmark to the ILAMB canon, removed the GFED3 and GFED4 burned area data products. -* Added many more plots to the generic output including the RMSE and the score maps. -* The ILAMB core has been enhanced to better handle depths. This has enabled ocean comparisons among others. -* An initial collection of ocean datasets has been assembled in the ``demo/iomb.cfg`` file for ocean benchmarking. -* The plotting phase of ``ilamb-run`` may now be skipped with the ``--skip_plots`` option. -* Relationship overall scores are now available in an image on the main html output page. -* Additional `tutorials `_ have been added to explain these new features. +We are pleased to announce version 2.3 of the ILAMB python +package. Among many bugfixes and improvements we highlight these major +changes: + +* You may observe a large shift in some score values. In this version + we solidified our scoring methodology while writing a `paper + `_ which necesitated + reworking some of the scores. For details, see the linked paper. +* Made a memory optimization pass through the analysis routines. Peak + memory usage and the time at peak was reduced improving performance. +* Restructured the symbolic manipulation of derived variables to + greatly reduce the required memory. +* Moved from using cfunits to cf_units. Both are python wrappers + around the UDUNITS library, but cfunits is stagnant and placed a + lower limit to the version of the netCDF4 python wrappers we could + use. +* The scoring of the interannual variability was missed in the port + from version 1 to 2, we have added the metric. +* The terrestrial water storage anomaly GRACE metric was changed to + compare mean anomaly values over large river basins. For details see + the ILAMB paper. + Funding ------- diff --git a/bin/ilamb-run b/bin/ilamb-run index a98e05fd..03f7b9d5 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -378,6 +378,7 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): print (" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name) sys.stdout.flush() + comm.Barrier() for c in C: if not skip_plots: try: @@ -476,7 +477,8 @@ parser.add_argument('--model_setup', dest="model_setup", type=str, nargs='+',def help='list files model setup information') parser.add_argument('--skip_plots', dest="skip_plots", action="store_true", help='enable to skip the plotting phase') - +parser.add_argument('--rel_only', dest="rel_only", action="store_true", + help='enable only display relative differences in overall scores') args = parser.parse_args() if args.config is None: if rank == 0: @@ -505,7 +507,8 @@ S = Scoreboard(args.config[0], master = rank==0, verbose = not args.quiet, build_dir = args.build_dir[0], - extents = RestrictiveModelExtents(M)) + extents = RestrictiveModelExtents(M), + rel_only = args.rel_only) C = MatchRelationshipConfrontation(S.list()) Cf = FilterConfrontationList(C,args.confront) @@ -520,7 +523,7 @@ if args.logging: if rank == 0: logger.info(" " + " ".join(os.uname())) - for key in ["ILAMB","numpy","matplotlib","netCDF4","cfunits","sympy","mpi4py"]: + for key in ["ILAMB","numpy","matplotlib","netCDF4","cf_units","sympy","mpi4py"]: pkg = __import__(key) try: path = pkg.__path__[0] diff --git a/bin/ilamb-table b/bin/ilamb-table new file mode 100644 index 00000000..374b6abb --- /dev/null +++ b/bin/ilamb-table @@ -0,0 +1,65 @@ +#!/usr/bin/env python +""" +""" +from ILAMB.Scoreboard import Scoreboard +from netCDF4 import Dataset +import os,argparse,sys + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, + help='path to configuration file to use') +parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], + help='path of where to save the output') +parser.add_argument('--csv_file', dest="csv", metavar='csv', type=str, nargs=1,default=["table.csv"], + help='destination filename for the table') + +args = parser.parse_args() +if args.config is None: + print "\nError: You must specify a configuration file using the option --config\n" + sys.exit(1) + +S = Scoreboard(args.config[0],verbose=False,build_dir=args.build_dir[0]) + +region = "global" +scalar = "RMSE" +sname = "%s %s" % (scalar,region) +group = "MeanState" +table = {} +unit = {} +for c in S.list(): + for subdir, dirs, files in os.walk(c.output_path): + for fname in files: + if not fname.endswith(".nc"): continue + with Dataset(os.path.join(c.output_path,fname)) as dset: + if group not in dset.groups .keys(): continue + if "scalars" not in dset.groups[group].groups.keys(): continue + grp = dset.groups[group]["scalars"] + if sname not in grp.variables.keys(): continue + var = grp.variables[sname] + if not table.has_key(c.longname): + table[c.longname] = {} + unit [c.longname] = var.units + table[c.longname][dset.name] = var[...] + +# What models have data? +models = [] +for key in table.keys(): + for m in table[key].keys(): + if m not in models: models.append(m) +models.sort() + +# render a table of values in csv format +lines = ",".join(["Name","Units"] + models) +for c in S.list(): + if not table.has_key(c.longname): continue + line = "%s,%s" % (c.longname,unit[c.longname]) + for m in models: + if table[c.longname].has_key(m): + line += ",%g" % (table[c.longname][m]) + else: + line += "," + lines += "\n%s" % line + +with file(args.csv[0],mode="w") as f: + f.write(lines) + diff --git a/demo/ilamb.cfg b/demo/ilamb.cfg index 83692eb2..793227fd 100644 --- a/demo/ilamb.cfg +++ b/demo/ilamb.cfg @@ -284,6 +284,7 @@ skip_iav = True [h2: Terrestrial Water Storage Anomaly] variable = "twsa" alternate_vars = "tws" +derived = "pr-evspsbl-mrro" cmap = "Blues" weight = 5 ctype = "ConfTWSA" @@ -292,6 +293,31 @@ ctype = "ConfTWSA" source = "DATA/twsa/GRACE/twsa_0.5x0.5.nc" weight = 25 +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Snow Water Equivalent] +variable = "swe" +alternate_vars = "snw" +cmap = "Blues" +weight = 5 + +[CanSISE] +source = "DATA/swe/CanSISE/swe.nc" +weight = 25 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Permafrost] +variable = "tsl" + +[NSIDC] +ctype = "ConfPermafrost" +source = "DATA/permafrost/NSIDC/NSIDC_0.5x0.5.nc" +y0 = 1970. +yf = 2000. +Teps = 273.15 +dmax = 3.5 + ########################################################################### [h1: Radiation and Energy Cycle] diff --git a/doc/install.rst b/doc/install.rst index 7ccfaaf9..84a85043 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -25,7 +25,7 @@ include: * netCDF4_, a python/numpy interface to the netCDF C library (you must have the C library installed) * sympy_, a python library for symbolic mathematics * mpi4py_, a python wrapper around the MPI library (you must have a MPI implementation installed) -* cfunits_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) +* cf_units_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) I have designated that a few of these dependencies are python interfaces to C libraries and so the library must also be installed @@ -271,7 +271,7 @@ Next open the local copy of the file with a editor and search for .. _numpy: https://www.numpy.org/ .. _matplotlib: https://matplotlib.org/ .. _netCDF4: https://github.com/Unidata/netcdf4-python -.. _cfunits: https://bitbucket.org/cfpython/cfunits-python/ +.. _cf_units: https://github.com/SciTools/cf-units .. _basemap: https://github.com/matplotlib/basemap .. _sympy: https://www.sympy.org/ .. _mpi4py: https://pythonhosted.org/mpi4py/ diff --git a/setup.py b/setup.py index e36e7ff2..b189267f 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess import os -VERSION = '2.2' +VERSION = '2.3' def git_version(): """ @@ -97,12 +97,12 @@ def write_version_py(filename=os.path.join('src/ILAMB', 'generated_version.py')) keywords=['benchmarking','earth system modeling','climate modeling','model intercomparison'], packages=['ILAMB'], package_dir={'ILAMB' : 'src/ILAMB'}, - scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor'], - install_requires=['numpy>=1.9.2', + scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor','bin/ilamb-table'], + install_requires=['numpy>=1.11.0', 'matplotlib>=1.4.3', #'basemap>=1.0.7', # basemap is in pypi but broken, need to manually install - 'netCDF4>=1.1.4,<=1.2.4', # upper limit is for cfunits - 'cfunits>=1.1.4', + 'netCDF4>=1.1.4', + 'cf_units>=2.0.0', 'sympy>=0.7.6', 'mpi4py>=1.3.1', 'scipy>=0.9.0'] diff --git a/src/ILAMB/ConfPermafrost.py b/src/ILAMB/ConfPermafrost.py new file mode 100644 index 00000000..92c2ead4 --- /dev/null +++ b/src/ILAMB/ConfPermafrost.py @@ -0,0 +1,223 @@ +from Confrontation import Confrontation +from mpl_toolkits.basemap import Basemap +from Variable import Variable +from Post import ColorBar +import matplotlib.pyplot as plt +from netCDF4 import Dataset +import ilamblib as il +import numpy as np + +class ConfPermafrost(Confrontation): + + def __init__(self,**keywords): + + # Ugly, but this is how we call the Confrontation constructor + super(ConfPermafrost,self).__init__(**keywords) + + # Now we overwrite some things which are different here + self.layout + self.regions = ["global"] + self.layout.regions = self.regions + self.weight = { "Obs Score" : 1., + "Mod Score" : 1. } + for page in self.layout.pages: + page.setMetricPriority(["Total Area" , + "Overlap Area", + "Missed Area" , + "Excess Area" , + "Obs Score" , + "Mod Score" , + "Overall Score"]) + + def stageData(self,m): + + obs = Variable(filename = self.source, + variable_name = "permafrost_extent") + + # These parameters may be changed from the configure file + y0 = float(self.keywords.get("y0" ,1970.)) # [yr] beginning year to include in analysis + yf = float(self.keywords.get("yf" ,2000.)) # [yr] end year to include in analysis + dmax = float(self.keywords.get("dmax",3.5)) # [m] consider layers where depth in is the range [0,dmax] + Teps = float(self.keywords.get("Teps",273.15)) # [K] temperature below which we assume permafrost occurs + + t0 = (y0 -1850.)*365. + tf = (yf+1-1850.)*365. + mod = m.extractTimeSeries(self.variable, + initial_time = t0, + final_time = tf) + mod.trim(t = [t0 ,tf ], + lat = [obs.lat.min(),90 ], + d = [0 ,dmax]) + mod = mod.annualCycle() + Tmax = mod.data.max(axis=0) + table = np.zeros(Tmax.shape[-2:]) + table[...] = np.NAN + thaw = np.zeros(table.shape,dtype=bool) + for i in range(mod.depth_bnds.shape[0]-1,-1,-1): + thaw += (Tmax[i]>=Teps) + frozen = np.where((Tmax[i] 60: fsize = 10 - ax.set_ylabel(ylabel,fontsize=fsize) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() - - # add the figure to the HTML layout - if name == "Benchmark" and region == "global": - short_name = short_name.replace("global_","") - page.addFigure(c.longname, - "benchmark_" + short_name, - "Benchmark_RNAME_%s.png" % (short_name), - legend = False, - benchmark = False) - page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, - benchmark = False) - - # determine the 1D relationship curves - bins = np.linspace(ind_min,ind_max,nbin+1) - delta = 0.1*(bins[1]-bins[0]) - inds = np.digitize(x,bins) - ids = np.unique(inds).clip(1,bins.size-1) - xb = [] - yb = [] - eb = [] - for i in ids: - yt = y[inds==i] - xi = 0.5 - xb.append(xi*bins[i-1]+(1.-xi)*bins[i]) - yb.append(yt.mean()) - try: - eb.append(yt.std()) # for some reason this fails sometimes - except: - eb.append(np.sqrt(((yt-yb[-1])**2).sum()/float(yt.size))) - - if name == "Benchmark": - obs_x = np.asarray(xb) - obs_y = np.asarray(yb) - obs_e = np.asarray(eb) - else: - mod_x = np.asarray(xb) - mod_y = np.asarray(yb) - mod_e = np.asarray(eb) - - # compute and plot the difference - O = np.array(obs_dist.data) - M = np.array(mod_dist.data) - O[np.where(obs_dist.mask)] = 0. - M[np.where(mod_dist.mask)] = 0. - dif_dist = np.ma.masked_array(M-O,mask=obs_dist.mask*mod_dist.mask) - lim = np.abs(dif_dist).max() - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - pc = ax.pcolormesh(xedges, - yedges, - dif_dist, - cmap = "Spectral_r", - vmin = -lim, - vmax = +lim) - div = make_axes_locatable(ax) - fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), - orientation="vertical", - label="Distribution Difference") - ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) - ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_diff_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="r+") as results: + + # Grab/create a relationship and scalars group + group = None + if "Relationships" not in results.groups: + group = results.createGroup("Relationships") + else: + group = results.groups["Relationships"] + if "scalars" not in group.groups: + scalars = group.createGroup("scalars") + else: + scalars = group.groups["scalars"] + + # for each relationship... + for c in self.relationships: + + # try to get the independent data from the model and obs + try: + ref_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) + com_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) + ind_name = c.longname.split("/")[0] + ind_min = c.limits["timeint"]["min"]-1e-12 + ind_max = c.limits["timeint"]["max"]+1e-12 + except: + continue + + # Add figures to the html page page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, + "benchmark_rel_%s" % ind_name, + "Benchmark_RNAME_rel_%s.png" % ind_name, + legend = False, benchmark = False) - - # score the distributions = 1 - Hellinger distance - score = 1.-np.sqrt(((np.sqrt(obs_dist)-np.sqrt(mod_dist))**2).sum())/np.sqrt(2) - vname = '%s Score %s' % (c.longname.split('/')[0],region) - #if vname in scalars.variables: - # scalars.variables[vname][0] = score - #else: - # Variable(name = vname, - # unit = "1", - # data = score).toNetCDF4(results,group="Relationships") - - # plot the 1D curve - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - ax.errorbar(obs_x-delta,obs_y,yerr=obs_e,fmt='-o',color='k') - ax.errorbar(mod_x+delta,mod_y,yerr=mod_e,fmt='-o',color=m.color) - ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) - ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_func_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, + "rel_%s" % ind_name, + "MNAME_RNAME_rel_%s.png" % ind_name, + legend = False, benchmark = False) + page.addFigure(c.longname, + "rel_diff_%s" % ind_name, + "MNAME_RNAME_rel_diff_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_func_%s" % ind_name, + "MNAME_RNAME_rel_func_%s.png" % ind_name, + legend = False, + benchmark = False) + + # Analysis over regions + lim_dep = [dep_min,dep_max] + lim_ind = [ind_min,ind_max] + longname = c.longname.split('/')[0] + for region in self.regions: + ref_dist = _buildDistributionResponse(ref_ind,ref_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + com_dist = _buildDistributionResponse(com_ind,com_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + + # Make the plots + _plotDistribution(ref_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), + "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) + _plotDistribution(com_dist[0],com_dist[1],com_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) + _plotDifference (ref_dist[0],com_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) + _plotFunction (ref_dist[3],ref_dist[4],com_dist[3],com_dist[4],ref_dist[1],ref_dist[2], + "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), + m.color, + os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) + + # Score the distribution + score = _scoreDistribution(ref_dist[0],com_dist[0]) + sname = "%s Hellinger Distance %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + + # Score the functional response + score = _scoreFunction(ref_dist[3],com_dist[3]) + sname = "%s RMSE Score %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + - # score the relationship - i0,i1 = np.where(np.abs(obs_x[:,np.newaxis]-mod_x)<1e-12) - obs_y = obs_y[i0]; mod_y = mod_y[i1] - isnan = np.isnan(obs_y)*np.isnan(mod_y) - obs_y[isnan] = 0.; mod_y[isnan] = 0. - score = np.exp(-np.linalg.norm(obs_y-mod_y)/np.linalg.norm(obs_y)) - vname = '%s RMSE Score %s' % (c.longname.split('/')[0],region) - if vname in scalars.variables: - scalars.variables[vname][0] = score - else: - Variable(name = vname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - results.close() + class FileContextManager(): diff --git a/src/ILAMB/ModelResult.py b/src/ILAMB/ModelResult.py index eff17ebf..6980d8b8 100644 --- a/src/ILAMB/ModelResult.py +++ b/src/ILAMB/ModelResult.py @@ -266,7 +266,6 @@ def derivedVariable(self,variable_name,expression,lats=None,lons=None,initial_ti """ from sympy import sympify - from cfunits import Units if expression is None: raise il.VarNotInModel() args = {} units = {} diff --git a/src/ILAMB/Post.py b/src/ILAMB/Post.py index 502f9f12..809b154d 100644 --- a/src/ILAMB/Post.py +++ b/src/ILAMB/Post.py @@ -223,11 +223,19 @@ def __init__(self,name,title): def __str__(self): r = Regions() - def _sortFigures(figure,priority=["benchmark_timeint","timeint","timeintremap","bias","rmse","benchmark_phase","phase","shift","biasscore","rmsescore","shiftscore","spatial_variance","legend_spatial_variance","spaceint","accumulate","cycle","dtcycle","compcycle","temporal_variance"]): + def _sortFigures(figure): + macro = ["timeint","bias","rmse","iav","phase","shift","variance","spaceint","accumulate","cycle"] val = 1. - for i,pname in enumerate(priority): - if pname == figure.name: val += 2**i - return val + for i,m in enumerate(macro): + if m in figure.name: val += 3**i + if figure.name.startswith("benchmark"): val -= 1. + if figure.name.endswith("score"): val += 1. + if figure.name.startswith("legend"): + if "variance" in figure.name: + val += 1. + else: + val = 0. + return val code = """
@@ -834,11 +842,14 @@ def head(self): class HtmlLayout(): def __init__(self,pages,cname,years=None): - + self.pages = pages self.cname = cname.replace("/"," / ") if years is not None: - self.cname += " / %d-%d" % (years) + try: + self.cname += " / %d-%d" % (years) + except: + pass for page in self.pages: page.pages = self.pages page.cname = self.cname @@ -1048,7 +1059,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa nvariables = len(variables) maxV = max([len(v) for v in variables]) maxM = max([len(m) for m in models]) - wpchar = 0.1 + wpchar = 0.15 wpcell = 0.19 hpcell = 0.25 w = maxV*wpchar + max(4,nmodels)*wpcell @@ -1085,6 +1096,8 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa ax[0].set_yticklabels(variables[::-1]) ax[0].tick_params('both',length=0,width=0,which='major') ax[0].tick_params(axis='y',pad=10) + ax[0].set_xlim(0,nmodels) + ax[0].set_ylim(0,nvariables) if vcolor is not None: for i,t in enumerate(ax[0].yaxis.get_ticklabels()): t.set_backgroundcolor(vcolor[::-1][i]) @@ -1117,6 +1130,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa ax[i].set_xticklabels(models,rotation=90) ax[i].tick_params('both',length=0,width=0,which='major') ax[i].set_yticks([]) + ax[i].set_xlim(0,nmodels) ax[i].set_ylim(0,nvariables) if rel_only: ax[i].set_yticks (np.arange(nvariables)+0.5) diff --git a/src/ILAMB/Scoreboard.py b/src/ILAMB/Scoreboard.py index ba0b2cfb..5fceb929 100644 --- a/src/ILAMB/Scoreboard.py +++ b/src/ILAMB/Scoreboard.py @@ -5,6 +5,7 @@ from ConfEvapFraction import ConfEvapFraction from ConfIOMB import ConfIOMB from ConfDiurnal import ConfDiurnal +from ConfPermafrost import ConfPermafrost import os,re from netCDF4 import Dataset import numpy as np @@ -179,17 +180,19 @@ def ParseScoreboardConfigureFile(filename): "ConfRunoff" : ConfRunoff, "ConfEvapFraction": ConfEvapFraction, "ConfIOMB" : ConfIOMB, - "ConfDiurnal" : ConfDiurnal} + "ConfDiurnal" : ConfDiurnal, + "ConfPermafrost" : ConfPermafrost} class Scoreboard(): """ A class for managing confrontations """ - def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None): + def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None,rel_only=False): if not os.environ.has_key('ILAMB_ROOT'): raise ValueError("You must set the environment variable 'ILAMB_ROOT'") self.build_dir = build_dir + self.rel_only = rel_only if (master and not os.path.isdir(self.build_dir)): os.mkdir(self.build_dir) @@ -272,7 +275,7 @@ def createHtml(self,M,filename="index.html"): has_rel = np.asarray([len(rel.children) for rel in rel_tree.children]).sum() > 0 nav = "" if has_rel: - GenerateRelSummaryFigure(self,M,"%s/overview_rel.png" % self.build_dir) + GenerateRelSummaryFigure(rel_tree,M,"%s/overview_rel.png" % self.build_dir,rel_only=self.rel_only) nav = """
  • Relationship
  • """ #global global_print_node_string @@ -453,7 +456,7 @@ def createBarCharts(self,M): html = GenerateBarCharts(self.tree,M) def createSummaryFigure(self,M): - GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir) + GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir,rel_only=self.rel_only) def dumpScores(self,M,filename): out = file("%s/%s" % (self.build_dir,filename),"w") @@ -582,7 +585,7 @@ def GenerateTable(tree,M,S,composite=True): BuildHTMLTable(tree,M,S.build_dir) return global_html -def GenerateSummaryFigure(tree,M,filename): +def GenerateSummaryFigure(tree,M,filename,rel_only=False): models = [m.name for m in M] variables = [] @@ -602,96 +605,34 @@ def GenerateSummaryFigure(tree,M,filename): else: data[row,:] = var.score - BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors) - -def GenerateRelSummaryFigure(S,M,figname): - - def _parse(node): - global score,count,rows - if node.level != 5: return - row = "%s vs. %s" % (node.parent.parent.parent.name,node.parent.name) - col = node.name - if row not in rows: rows.append(row) - if not score .has_key(row): score[row] = {} - if not count .has_key(row): count[row] = {} - if not score[row].has_key(col): score[row][col] = 0. - if not count[row].has_key(col): count[row][col] = 0. - score[row][col] += node.score - count[row][col] += 1. - - class rnode(): - def __init__(self,name,level): - self.name = name - self.level = level - self.parent = None - self.score = None - self.children = [] - - - root = S.build_dir - tree = rnode("root",0) - previous_node = tree - current_level = 0 - - for subdir, dirs, files in os.walk(root): - if subdir == root: continue - flat = subdir.replace(root,"").lstrip("/").split("/") - level = len(flat) - name = flat[-1] - child = rnode(name,level) - if level == current_level: - child.parent = previous_node.parent - previous_node.parent.children.append(child) - if level == 3: - for fname in [f for f in files if f.endswith(".nc") and "Benchmark" not in f]: - with Dataset(os.path.join(subdir,fname)) as dset: - if "Relationships" not in dset.groups: continue - grp = dset.groups["Relationships"]["scalars"] - model = dset.name - for var in [var for var in grp.variables.keys() if ("Overall" not in var and - "global" in var)]: - rname = var.split(" ")[0] - hadrel = False - for c in child.children: - if c.name == rname: - rel = c - hadrel = True - if not hadrel: rel = rnode(rname,level+1) - mod = rnode(model,level+2) - mod.score = grp.variables[var][...] - mod.parent = rel - rel.children.append(mod) - rel.parent = child - if not hadrel: child.children.append(rel) - elif level > current_level: - child.parent = previous_node - previous_node.children.append(child) - current_level = level - else: - addto = tree - for i in range(level-1): addto = addto.children[-1] - child.parent = addto - addto.children.append(child) - current_level = level - previous_node = child - - global score,count,rows - score = {} - count = {} - rows = [] - TraversePreorder(tree,_parse) - models = [] - for row in rows: - for key in score[row].keys(): - if key not in models: models.append(key) - data = np.zeros((len(rows),len(models))) + BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors,rel_only=rel_only) + +def GenerateRelSummaryFigure(S,M,figname,rel_only=False): + + # reorganize the relationship data + scores = {} + counts = {} + rows = [] + vcolors = [] + for h1 in S.children: + for dep in h1.children: + dname = dep.name.split("/")[0] + for ind in dep.children: + iname = ind.name.split("/")[0] + key = "%s/%s" % (dname,iname) + if scores.has_key(key): + scores[key] += ind.score + counts[key] += 1. + else: + scores[key] = np.copy(ind.score) + counts[key] = 1. + rows .append(key) + vcolors.append(h1.bgcolor) + if len(rows) == 0: return + data = np.ma.zeros((len(rows),len(M))) for i,row in enumerate(rows): - for j,col in enumerate(models): - try: - data[i,j] = score[row][col] / count[row][col] - except: - data[i,j] = np.nan - BenchmarkSummaryFigure(models,rows,data,figname,rel_only=False) + data[i,:] = scores[row] / counts[row] + BenchmarkSummaryFigure([m.name for m in M],rows,data,figname,rel_only=rel_only,vcolor=vcolors) def GenerateRelationshipTree(S,M): @@ -753,7 +694,6 @@ def GenerateRelationshipTree(S,M): if "Overall Score global" not in grp.variables.keys(): continue h2.score[i] = grp.variables["Overall Score global"][...] - return rel_tree diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 7d1f48b8..fcfa33f9 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -3,7 +3,7 @@ from mpl_toolkits.basemap import Basemap import matplotlib.colors as colors from pylab import get_cmap -from cfunits import Units +from cf_units import Unit import ilamblib as il import Post as post import numpy as np @@ -220,6 +220,17 @@ def __str__(self): return s + def nbytes(self): + r"""Estimate the memory usage of a variable in bytes. + """ + nbytes = 0. + for key in self.__dict__.keys(): + try: + nbytes += self.__dict__[key].nbytes + except: + pass + return nbytes + def integrateInTime(self,**keywords): r"""Integrates the variable over a given time period. @@ -286,7 +297,7 @@ def integrateInTime(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Units(self.unit) + unit = Unit(self.unit) name = self.name + "_integrated_over_time" if mean: @@ -300,18 +311,18 @@ def integrateInTime(self,**keywords): else: dt = dt.sum(axis=0) np.seterr(over='ignore',under='ignore') - integral /= dt + integral = integral / dt np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("d")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("d")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = integral, - unit = unit.units, + unit = "%s" % unit, name = name, lat = self.lat, lat_bnds = self.lat_bnds, @@ -403,7 +414,7 @@ def integrateInDepth(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Units(self.unit) + unit = Unit(self.unit) name = self.name + "_integrated_over_depth" if mean: @@ -417,18 +428,18 @@ def integrateInDepth(self,**keywords): else: dz = dz.sum(axis=axis) np.seterr(over='ignore',under='ignore') - integral /= dz + integral = integral / dz np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("m")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("m")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = integral, - unit = unit.units, + unit = "%s" % unit, name = name, time = self.time, time_bnds = self.time_bnds, @@ -521,13 +532,13 @@ def _integrate(var,areas): integral = _integrate(self.data,measure) if mean: np.seterr(under='ignore') - integral /= measure.sum() + integral = integral / measure.sum() np.seterr(under='raise') # handle the name and unit name = self.name + "_integrated_over_space" if region is not None: name = name.replace("space",region) - unit = Units(self.unit) + unit = Unit(self.unit) if mean: # we have already divided thru by the non-masked area in @@ -536,12 +547,12 @@ def _integrate(var,areas): else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("m2")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("m2")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = np.ma.masked_array(integral), - unit = unit.units, + unit = "%s" % unit, time = self.time, time_bnds = self.time_bnds, depth = self.depth, @@ -710,7 +721,7 @@ def _make_bnds(x): bnds[0] = max(x[0] -0.5*(x[ 1]-x[ 0]),-180) bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) return bnds - assert Units(var.unit) == Units(self.unit) + assert Unit(var.unit) == Unit(self.unit) assert self.temporal == False assert self.ndata == var.ndata assert self.layered == False @@ -752,7 +763,7 @@ def _make_bnds(x): def convert(self,unit,density=998.2): """Convert the variable to a given unit. - We use the UDUNITS library via the cfunits python interface to + We use the UDUNITS library via the cf_units python interface to convert the variable's unit. Additional support is provided for unit conversions in which substance information is required. For example, in quantities such as precipitation it @@ -777,53 +788,53 @@ def convert(self,unit,density=998.2): this object with its unit converted """ - src_unit = Units(self.unit) - tar_unit = Units( unit) + if unit is None: return self + src_unit = Unit(self.unit) + tar_unit = Unit( unit) mask = self.data.mask # Define some generic quantities - linear = Units("m") - linear_rate = Units("m s-1") - area_density = Units("kg m-2") - area_density_rate = Units("kg m-2 s-1") - mass_density = Units("kg m-3") - volume_conc = Units("mol m-3") - mass_conc = Units("mol kg-1") - - # cfunits doesn't handle frequently found temperature expressions + linear = Unit("m") + linear_rate = Unit("m s-1") + area_density = Unit("kg m-2") + area_density_rate = Unit("kg m-2 s-1") + mass_density = Unit("kg m-3") + volume_conc = Unit("mol m-3") + mass_conc = Unit("mol kg-1") + + # UDUNITS doesn't handle frequently found temperature expressions synonyms = {"K":"degK", "R":"degR", "C":"degC", "F":"degF"} for syn in synonyms.keys(): - if src_unit.units == syn: src_unit = Units(synonyms[syn]) - if tar_unit.units == syn: tar_unit = Units(synonyms[syn]) + if src_unit.format() == syn: src_unit = Unit(synonyms[syn]) + if tar_unit.format() == syn: tar_unit = Unit(synonyms[syn]) # Do we need to multiply by density? - if ( (src_unit.equivalent(linear_rate) and tar_unit.equivalent(area_density_rate)) or - (src_unit.equivalent(linear ) and tar_unit.equivalent(area_density )) or - (src_unit.equivalent(mass_conc ) and tar_unit.equivalent(volume_conc )) ): + if ( (src_unit.is_convertible(linear_rate) and tar_unit.is_convertible(area_density_rate)) or + (src_unit.is_convertible(linear ) and tar_unit.is_convertible(area_density )) or + (src_unit.is_convertible(mass_conc ) and tar_unit.is_convertible(volume_conc )) ): np.seterr(over='ignore',under='ignore') self.data *= density np.seterr(over='raise',under='raise') src_unit *= mass_density # Do we need to divide by density? - if ( (tar_unit.equivalent(linear_rate) and src_unit.equivalent(area_density_rate)) or - (tar_unit.equivalent(linear ) and src_unit.equivalent(area_density )) or - (tar_unit.equivalent(mass_conc ) and src_unit.equivalent(volume_conc )) ): + if ( (tar_unit.is_convertible(linear_rate) and src_unit.is_convertible(area_density_rate)) or + (tar_unit.is_convertible(linear ) and src_unit.is_convertible(area_density )) or + (tar_unit.is_convertible(mass_conc ) and src_unit.is_convertible(volume_conc )) ): np.seterr(over='ignore',under='ignore') - self.data /= density + self.data = self.data / density np.seterr(over='raise',under='raise') - src_unit /= mass_density + src_unit = src_unit / mass_density # Convert units try: - self.data = Units.conform(self.data,src_unit,tar_unit) + self.data = src_unit.convert(self.data,tar_unit) self.data = np.ma.masked_array(self.data,mask=mask) self.unit = unit except: - print "var_name = %s, src_unit = %s, target_unit = %s " % (self.name,src_unit,tar_unit) raise il.UnitConversionError() return self @@ -1599,7 +1610,7 @@ def spatialDistribution(self,var,region="global"): R0 = 1.0 std0 = std0.clip(1e-12) std = std .clip(1e-12) - std /= std0 + std = std/std0 score = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0)) except: std = np.asarray([0.0]) diff --git a/src/ILAMB/__init__.py b/src/ILAMB/__init__.py index f8ea82b9..3bb5124a 100644 --- a/src/ILAMB/__init__.py +++ b/src/ILAMB/__init__.py @@ -1,6 +1,6 @@ __author__ = 'Nathan Collier' -__date__ = 'Nov 2017' -__version__ = '2.2' +__date__ = 'Jun 2018' +__version__ = '2.3' from distutils.version import LooseVersion import platform @@ -10,7 +10,7 @@ "numpy" : "1.9.2", "matplotlib" : "1.4.3", "netCDF4" : "1.1.4", - "cfunits" : "1.1.4", + "cf_units" : "2.0.0", "mpl_toolkits.basemap" : "1.0.7", "sympy" : "0.7.6", "mpi4py" : "1.3.1" diff --git a/src/ILAMB/constants.py b/src/ILAMB/constants.py index 714209d8..8d77b8f6 100644 --- a/src/ILAMB/constants.py +++ b/src/ILAMB/constants.py @@ -130,6 +130,28 @@ "sidelbl" :"RMSE SCORE", "haslegend" :True } +space_opts["iav"] = { "name" :"Interannual variability", + "cmap" :"Reds", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iav.png", + "sidelbl" :"MODEL INTERANNUAL VARIABILITY", + "haslegend" :True } + +space_opts["iavscore"] = { "name" :"Interannual variability score", + "cmap" :"RdYlGn", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iavscore.png", + "sidelbl" :"INTERANNUAL VARIABILITY SCORE", + "haslegend" :True } + space_opts["shift"] = { "name" :"Temporally integrated mean phase shift", "cmap" :"PRGn", "sym" :True, diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 1304be6a..b7cff096 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -3,11 +3,11 @@ from Regions import Regions from netCDF4 import Dataset,num2date,date2num from datetime import datetime -from cfunits import Units +from cf_units import Unit from copy import deepcopy from mpi4py import MPI import numpy as np -import logging +import logging,re logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) @@ -53,7 +53,30 @@ def __str__(self): return "NotLayeredVariable" class NotDatasiteVariable(Exception): def __str__(self): return "NotDatasiteVariable" +def FixDumbUnits(unit): + r"""Try to fix the dumb units people insist on using. + Parameters + ---------- + unit : str + the trial unit + + Returns + ------- + unit : str + the fixed unit + """ + # Various synonyms for 1 + if unit.lower().strip() in ["unitless", + "n/a", + "none"]: unit = "1" + # Remove the C which so often is used to mean carbon but actually means coulomb + tokens = re.findall(r"[\w']+", unit) + for token in tokens: + if token.endswith("C") and Unit(token[:-1]).is_convertible(Unit("g")): + unit = unit.replace(token,token[:-1]) + return unit + def GenerateDistinctColors(N,saturation=0.67,value=0.67): r"""Generates a series of distinct colors. @@ -86,7 +109,7 @@ def ConvertCalendar(t,tbnd=None): This routine converts the representation of time to the ILAMB default: days since 1850-1-1 00:00:00 on a 365-day calendar. This is so we can make comparisons with data from other models and - benchmarks. We use cfunits time conversion capability. + benchmarks. Parameters ---------- @@ -343,71 +366,51 @@ def SympifyWithArgsUnits(expression,args,units): """ from sympy import sympify,postorder_traversal - # The traversal needs that we make units commensurate when - # possible - keys = args.keys() - for i in range(len(keys)): - ikey = keys[i] - for j in range(i+1,len(keys)): - jkey = keys[j] - if Units(units[jkey]).equivalent(Units(units[ikey])): - args [jkey] = Units.conform(args[jkey], - Units(units[jkey]), - Units(units[ikey]), - inplace=True) - units[jkey] = units[ikey] - - # We need to do what sympify does but also with unit - # conversions. So we traverse the expression tree in post order - # and take actions based on the kind of operation being performed. expression = sympify(expression) + + # try to convert all arguments to same units if possible, it + # catches most use cases + keys = args.keys() + for i,key0 in enumerate(keys): + for key in keys[(i+1):]: + try: + Unit(units[key]).convert(args[key],Unit(units[key0]),inplace=True) + units[key] = units[key0] + except: + pass + for expr in postorder_traversal(expression): - - if expr.is_Atom: continue - ekey = str(expr) # expression key - + ekey = str(expr) if expr.is_Add: - # Addition will require that all args should be the same - # unit. As a convention, we will try to conform all units - # to the first variable's units. - key0 = None - for arg in expr.args: - key = str(arg) - if not args.has_key(key): continue - if key0 is None: - key0 = key - else: - # Conform these units to the units of the first arg - Units.conform(args[key], - Units(units[key]), - Units(units[key0]), - inplace=True) - units[key] = units[key0] - - args [ekey] = sympify(str(expr),locals=args) - units[ekey] = units[key0] + # if there are scalars in the expression, these will not + # be in the units dictionary. Add them and give them an + # implicit unit of 1 + keys = [str(arg) for arg in expr.args] + for key in keys: + if not units.has_key(key): units[key] = "1" - elif expr.is_Pow: + # if we are adding, all arguments must have the same unit. + key0 = keys[0] + for key in keys: + Unit(units[key]).convert(np.ones(1),Unit(units[key0])) + units[key] = units[key0] + units[ekey] = "%s" % (units[key0]) - assert len(expr.args) == 2 # check on an assumption - power = float(expr.args[1]) - args [ekey] = args[str(expr.args[0])]**power - units[ekey] = Units(units[str(expr.args[0])]) - units[ekey] = units[ekey]**power - - elif expr.is_Mul: + elif expr.is_Pow: - unit = Units("1") - for arg in expr.args: - key = str(arg) - if units.has_key(key): unit *= Units(units[key]) - - args [ekey] = sympify(str(expr),locals=args) - units[ekey] = Units(unit).formatted() + # if raising to a power, just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = "(%s)%s" % (units[keys[0]],keys[1]) - return args[ekey],units[ekey] + elif expr.is_Mul: + + # just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = " ".join(["(%s)" % units[key] for key in keys if units.has_key(key)]) + return sympify(str(expression),locals=args),units[ekey] + def ComputeIndexingArrays(lat2d,lon2d,lat,lon): """Blah. @@ -635,11 +638,11 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if depth_bnd_name is not None: depth_bnd = grp.variables[depth_bnd_name][...] if dunit is not None: - if not Units(dunit).equivalent(Units("m")): + if not Unit(dunit).is_convertible(Unit("m")): raise ValueError("Non-linear units [%s] of the layered dimension [%s] in %s" % (dunit,depth_name,filename)) - depth = Units.conform(depth,Units(dunit),Units("m"),inplace=True) + depth = Unit(dunit).convert(depth,Unit("m"),inplace=True) if depth_bnd is not None: - depth_bnd = Units.conform(depth_bnd,Units(dunit),Units("m"),inplace=True) + depth_bnd = Unit(dunit).convert(depth_bnd,Unit("m"),inplace=True) if data_name is not None: data = len(grp.dimensions[data_name]) @@ -701,16 +704,15 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if "missing_value" in var.ncattrs(): mask += (np.abs(v-var.missing_value)<1e-12) v = np.ma.masked_array(v,mask=mask,copy=False) - # handle units problems that cfunits doesn't if "units" in var.ncattrs(): - units = var.units.replace("unitless","1") + units = FixDumbUnits(var.units) else: units = "1" dset.close() return v,units,variable_name,t,t_bnd,lat,lat_bnd,lon,lon_bnd,depth,depth_bnd,cbounds,data -def Score(var,normalizer,FC=0.999999): +def Score(var,normalizer): """Remaps a normalized variable to the interval [0,1]. Parameters @@ -726,16 +728,7 @@ def Score(var,normalizer,FC=0.999999): name = name.replace("rmse","rmse_score") name = name.replace("iav" ,"iav_score") np.seterr(over='ignore',under='ignore') - - data = None - if "bias" in var.name or "diff" in var.name: - deno = np.ma.copy(normalizer.data) - if (deno.size - deno.mask.sum()) > 1: deno -= deno.min()*FC - data = np.exp(-np.abs(var.data/deno)) - elif "rmse" in var.name: - data = np.exp(-var.data/normalizer.data) - elif "iav" in var.name: - data = np.exp(-np.abs(var.data/normalizer.data)) + data = np.exp(-np.abs(var.data/normalizer.data)) data[data<1e-16] = 0. np.seterr(over='raise',under='raise') return Variable(name = name, @@ -810,7 +803,7 @@ def _composeGrids(v1,v2): lon = lon_bnds.mean(axis=1) return lat,lon,lat_bnds,lon_bnds -def AnalysisMeanState(ref,com,**keywords): +def AnalysisMeanStateSites(ref,com,**keywords): """Perform a mean state analysis. This mean state analysis examines the model mean state in space @@ -848,6 +841,7 @@ def AnalysisMeanState(ref,com,**keywords): the unit to use when displaying output on plots on the HTML page """ + from Variable import Variable regions = keywords.get("regions" ,["global"]) dataset = keywords.get("dataset" ,None) @@ -860,14 +854,14 @@ def AnalysisMeanState(ref,com,**keywords): skip_iav = keywords.get("skip_iav" ,False) skip_cycle = keywords.get("skip_cycle" ,False) ILAMBregions = Regions() - spatial = ref.spatial + spatial = False normalizer = None # Only study the annual cycle if it makes sense if not ref.monthly: skip_cycle = True if ref.time.size < 12: skip_cycle = True - - # We find + if skip_rmse : skip_iav = True + if spatial: lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) @@ -926,14 +920,58 @@ def AnalysisMeanState(ref,com,**keywords): # Compute the bias, RMSE, and RMS maps using the interpolated # quantities bias = REF_timeint.bias(COM_timeint) - bias_score_map = Score(bias,REF_timeint) + cREF = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata) + crms = cREF.rms () + bias_score_map = Score(bias,crms) if spatial: bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask if not skip_rmse: - rmse = REF.rmse(COM) - rms = REF.rms () - rmse_score_map = Score(rmse,rms) - + cCOM = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata) + rmse = REF.rmse( COM) + crmse = cREF.rmse(cCOM) + rmse_score_map = Score(crmse,crms) + if not skip_iav: + ref_iav = Variable(name = "centralized %s" % ref.name, unit = ref.unit, + data = np.ma.masked_array(ref.data-ref_timeint.data[np.newaxis,...],mask=ref.data.mask), + time = ref.time, time_bnds = ref.time_bnds, + lat = ref.lat , lat_bnds = ref.lat_bnds, + lon = ref.lon , lon_bnds = ref.lon_bnds, + area = ref.area, ndata = ref.ndata).rms() + com_iav = Variable(name = "centralized %s" % com.name, unit = com.unit, + data = np.ma.masked_array(com.data-com_timeint.data[np.newaxis,...],mask=com.data.mask), + time = com.time, time_bnds = com.time_bnds, + lat = com.lat , lat_bnds = com.lat_bnds, + lon = com.lon , lon_bnds = com.lon_bnds, + area = com.area, ndata = com.ndata).rms() + REF_iav = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata).rms() + COM_iav = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata).rms() + iav_score_map = Score(Variable(name = "diff %s" % REF.name, unit = REF.unit, + data = (COM_iav.data-REF_iav.data), + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata), + REF_iav) + # The phase shift comes from the interpolated quantities if not skip_cycle: ref_cycle = REF.annualCycle() @@ -948,7 +986,7 @@ def AnalysisMeanState(ref,com,**keywords): ref_period_mean = {}; ref_spaceint = {}; ref_mean_cycle = {}; ref_dtcycle = {} com_period_mean = {}; com_spaceint = {}; com_mean_cycle = {}; com_dtcycle = {} bias_val = {}; bias_score = {}; rmse_val = {}; rmse_score = {} - space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {} + space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {}; iav_score = {} ref_union_mean = {}; ref_comp_mean = {} com_union_mean = {}; com_comp_mean = {} for region in regions: @@ -975,6 +1013,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region] = rmse .integrateInSpace(region=region,mean=True) rmse_score [region] = rmse_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) space_std[region],space_cor[region],sd_score[region] = REF_timeint.spatialDistribution(COM_timeint,region=region) else: ref_period_mean[region] = ref_timeint .siteStats(region=region) @@ -995,6 +1035,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region] = rmse .siteStats(region=region) rmse_score [region] = rmse_score_map .siteStats(region=region,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .siteStats(region=region,weight=normalizer) ref_period_mean[region].name = "Period Mean (original grids) %s" % (region) ref_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) @@ -1005,6 +1047,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region].name = "RMSE %s" % (region) rmse_score [region].name = "RMSE Score %s" % (region) + if not skip_iav: + iav_score [region].name = "Interannual Variability Score %s" % (region) if not skip_cycle: ref_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) ref_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) @@ -1033,6 +1077,7 @@ def _convert(var,unit): plot_vars = [com_timeint,ref_timeint,bias,com_spaceint,ref_spaceint,bias_val] if not skip_rmse: plot_vars += [rmse,rmse_val] if not skip_cycle: plot_vars += [com_mean_cycle,ref_mean_cycle,com_dtcycle,ref_dtcycle] + if not skip_iav: plot_vars += [com_iav] for var in plot_vars: _convert(var,plot_unit) # Rename and optionally dump out information to netCDF4 files @@ -1064,13 +1109,17 @@ def _convert(var,unit): out_vars.append(shift_score_map) if not skip_rmse: rmse .name = "rmse_map_of_%s" % ref.name - rms .name = "rms_map_of_%s" % ref.name rmse_score_map.name = "rmsescore_map_of_%s" % ref.name out_vars.append(rmse) - out_vars.append(rms ) out_vars.append(rmse_score_map) out_vars.append(rmse_val) out_vars.append(rmse_score) + if not skip_iav: + com_iav.name = "iav_map_of_%s" % ref.name + iav_score_map.name = "iavscore_map_of_%s" % ref.name + out_vars.append(com_iav) + out_vars.append(iav_score_map) + out_vars.append(iav_score) if dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1089,6 +1138,9 @@ def _convert(var,unit): if not skip_cycle: ref_maxt_map.name = "phase_map_of_%s" % ref.name out_vars += [ref_maxt_map,ref_mean_cycle,ref_dtcycle] + if not skip_iav: + ref_iav.name = "iav_map_of_%s" % ref.name + out_vars.append(ref_iav) if benchmark_dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1097,124 +1149,303 @@ def _convert(var,unit): var.toNetCDF4(benchmark_dataset,group="MeanState") return - -def AnalysisRelationship(dep_var,ind_var,dataset,rname,**keywords): - """Perform a relationship analysis. - - Expand to provide details of what exactly is done. + +def AnalysisMeanStateSpace(ref,com,**keywords): + """Perform a mean state analysis. + + This mean state analysis examines the model mean state in space + and time. We compute the mean variable value over the time period + at each spatial cell or data site as appropriate, as well as the + bias and RMSE relative to the observational variable. We will + output maps of the period mean values and bias. For each spatial + cell or data site we also estimate the phase of the variable by + finding the mean time of year when the maximum occurs and the + phase shift by computing the difference in phase with respect to + the observational variable. In the spatial dimension, we compute a + spatial mean for each of the desired regions and an average annual + cycle. Parameters ---------- - dep_var : ILAMB.Variable.Variable - the dependent variable - ind_var : ILAMB.Variable.Variable - the independent variable - dataset : netCDF4.Dataset + obs : ILAMB.Variable.Variable + the observational (reference) variable + mod : ILAMB.Variable.Variable + the model (comparison) variable + regions : list of str, optional + the regions overwhich to apply the analysis + dataset : netCDF4.Dataset, optional a open dataset in write mode for caching the results of the analysis which pertain to the model - rname : str - the name of the relationship under study - regions : list of str, optional - a list of units over which to apply the analysis - dep_plot_unit,ind_plot_unit : str, optional - the name of the unit to use in the plots found on the HTML output - - """ - def _extractMaxTemporalOverlap(v1,v2): # should move? - t0 = max(v1.time.min(),v2.time.min()) - tf = min(v1.time.max(),v2.time.max()) - for v in [v1,v2]: - begin = np.argmin(np.abs(v.time-t0)) - end = np.argmin(np.abs(v.time-tf))+1 - v.time = v.time[begin:end] - v.data = v.data[begin:end,...] - mask = v1.data.mask + v2.data.mask - v1 = v1.data[mask==0].flatten() - v2 = v2.data[mask==0].flatten() - return v1,v2 - - # grab regions - regions = keywords.get("regions",["global"]) + benchmark_dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the observations + space_mean : bool, optional + disable to compute sums of the variable over space instead of + mean values + table_unit : str, optional + the unit to use when displaying output in tables on the HTML page + plots_unit : str, optional + the unit to use when displaying output on plots on the HTML page + + """ + from Variable import Variable + regions = keywords.get("regions" ,["global"]) + dataset = keywords.get("dataset" ,None) + benchmark_dataset = keywords.get("benchmark_dataset",None) + space_mean = keywords.get("space_mean" ,True) + table_unit = keywords.get("table_unit" ,None) + plot_unit = keywords.get("plot_unit" ,None) + mass_weighting = keywords.get("mass_weighting" ,False) + skip_rmse = keywords.get("skip_rmse" ,False) + skip_iav = keywords.get("skip_iav" ,False) + skip_cycle = keywords.get("skip_cycle" ,False) + ILAMBregions = Regions() + spatial = ref.spatial + + # Convert str types to booleans + if type(skip_rmse) == type(""): + skip_rmse = (skip_rmse.lower() == "true") + if type(skip_iav ) == type(""): + skip_iav = (skip_iav .lower() == "true") + if type(skip_cycle) == type(""): + skip_cycle = (skip_cycle.lower() == "true") - # convert to plot units - dep_plot_unit = keywords.get("dep_plot_unit",dep_var.unit) - ind_plot_unit = keywords.get("ind_plot_unit",ind_var.unit) - if dep_plot_unit is not None: dep_var.convert(dep_plot_unit) - if ind_plot_unit is not None: ind_var.convert(ind_plot_unit) - - # if the variables are temporal, we need to get period means - if dep_var.temporal: dep_var = dep_var.integrateInTime(mean=True) - if ind_var.temporal: ind_var = ind_var.integrateInTime(mean=True) - mask = dep_var.data.mask + ind_var.data.mask - - # analysis over regions - for region in regions: + # Check if we need to skip parts of the analysis + if not ref.monthly : skip_cycle = True + if ref.time.size < 12: skip_cycle = True + if ref.time.size == 1: skip_rmse = True + if skip_rmse : skip_iav = True + name = ref.name + + # Interpolate both reference and comparison to a grid composed of + # their cell breaks + ref.convert(plot_unit) + com.convert(plot_unit) + lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) + REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + unit = REF.unit + area = REF.area + ndata = REF.ndata + + # Find the mean values over the time period + ref_timeint = ref.integrateInTime(mean=True).convert(plot_unit) + com_timeint = com.integrateInTime(mean=True).convert(plot_unit) + REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) + COM_timeint = COM.integrateInTime(mean=True).convert(plot_unit) + normalizer = REF_timeint.data if mass_weighting else None + + # Report period mean values over all possible representations of + # land + ref_and_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == False) + ref_not_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == True ) + com_not_ref = (REF_timeint.data.mask == True ) * (COM_timeint.data.mask == False) + if benchmark_dataset is not None: - lats,lons = ILAMBregions[region] - rmask = (np.outer((dep_var.lat>lats[0])*(dep_var.latlons[0])*(dep_var.lon 1 else REF_timeint) + bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask + if dataset is not None: + bias.name = "bias_map_of_%s" % name + bias.toNetCDF4(dataset,group="MeanState") + bias_score_map.name = "biasscore_map_of_%s" % name + bias_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) + bias_val.name = "Bias %s" % region + bias_val.toNetCDF4(dataset,group="MeanState") + bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + bias_score.name = "Bias Score %s" % region + bias_score.toNetCDF4(dataset,group="MeanState") + del bias,bias_score_map + + # Spatial mean: plots + if REF.time.size > 1: + if benchmark_dataset is not None: + for region in regions: + ref_spaceint = REF.integrateInSpace(region=region,mean=True) + ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + ref_spaceint.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + for region in regions: + com_spaceint = COM.integrateInSpace(region=region,mean=True) + com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + com_spaceint.toNetCDF4(dataset,group="MeanState") + + # RMSE: maps, scalars, and scores + if not skip_rmse: + rmse = REF.rmse(COM).convert(plot_unit) + del REF + cCOM = Variable(name = "centralized %s" % name, unit = unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = COM.area, ndata = COM.ndata).convert(plot_unit) + del COM + crmse = cREF.rmse(cCOM).convert(plot_unit) + del cREF + if skip_iav: del cCOM + rmse_score_map = Score(crmse,REF_iav) + if dataset is not None: + rmse.name = "rmse_map_of_%s" % name + rmse.toNetCDF4(dataset,group="MeanState") + rmse_score_map.name = "rmsescore_map_of_%s" % name + rmse_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) + rmse_val.name = "RMSE %s" % region + rmse_val.toNetCDF4(dataset,group="MeanState") + rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + rmse_score.name = "RMSE Score %s" % region + rmse_score.toNetCDF4(dataset,group="MeanState") + del rmse,crmse,rmse_score_map + + # IAV: maps, scalars, scores + if not skip_iav: + COM_iav = cCOM.rms() + del cCOM + iav_score_map = Score(Variable(name = "diff %s" % name, unit = unit, + data = (COM_iav.data-REF_iav.data), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = area, ndata = ndata), + REF_iav) + if benchmark_dataset is not None: + REF_iav.name = "iav_map_of_%s" % name + REF_iav.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + COM_iav.name = "iav_map_of_%s" % name + COM_iav.toNetCDF4(dataset,group="MeanState") + iav_score_map.name = "iavscore_map_of_%s" % name + iav_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + iav_score = iav_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + iav_score.name = "Interannual Variability Score %s" % region + iav_score.toNetCDF4(dataset,group="MeanState") + del COM_iav,iav_score_map + del REF_iav + + return def ClipTime(v,t0,tf): """Remove time from a variable based on input bounds. @@ -1300,10 +1531,10 @@ def MakeComparable(ref,com,**keywords): # If the reference is spatial, the comparison must be if ref.spatial and not com.spatial: - msg = "%s Datasets are not uniformly spatial: " % logstring - msg += "reference = %s, comparison = %s" % (ref.spatial,com.spatial) - logger.debug(msg) - raise VarsNotComparable() + ref = ref.extractDatasites(com.lat,com.lon) + msg = "%s The reference dataset is spatial but the comparison is site-based. " % logstring + msg += "Extracted %s sites from the reference to match the comparison." % ref.ndata + logger.info(msg) # If the reference is layered, the comparison must be if ref.layered and not com.layered: @@ -1383,7 +1614,7 @@ def MakeComparable(ref,com,**keywords): # comparison, coarsen the comparison if np.log10(ref.dt/com.dt) > 0.5: com = com.coarsenInTime(ref.time_bnds,window=window) - + # Time bounds of the reference dataset t0 = ref.time_bnds[ 0,0] tf = ref.time_bnds[-1,1] diff --git a/test/scores_test.csv.gold b/test/scores_test.csv.gold index 6d65ab7b..6fccfb93 100644 --- a/test/scores_test.csv.gold +++ b/test/scores_test.csv.gold @@ -1,9 +1,9 @@ Variables,CLM50r243CRUNCEP,CLM50r243GSWP3 -Biomass,0.595710463937,0.678304573522 -Gross Primary Productivity,0.753476728464,0.741270301037 -Global Net Ecosystem Carbon Balance,0.705400063727,0.863669079462 -Net Ecosystem Exchange,0.524058275106,0.504338904659 -Terrestrial Water Storage Anomaly,0.484015616221,0.470205924215 -Albedo,0.771776381299,0.774604472682 -Surface Air Temperature,0.988457088529,0.990624010352 -Precipitation,0.812343937554,0.824581872315 +Biomass,0.5957104653413856,0.6783045750117078 +Gross Primary Productivity,0.6217211297637607,0.6126273585798891 +Global Net Ecosystem Carbon Balance,0.7054000637266042,0.8636690794621101 +Net Ecosystem Exchange,0.3941918077804778,0.38120476926634617 +Terrestrial Water Storage Anomaly,0.7000653021257858,0.7269702240175762 +Albedo,0.5434663466148166,0.544587485316599 +Surface Air Temperature,0.9256731031865132,0.9314748385926337 +Precipitation,0.7555153501937276,0.7679655805094326 From cac2eedc7c959c19e0d1bc760401c7d687dcad70 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 3 Jul 2018 14:35:43 -0600 Subject: [PATCH 05/22] Squashed 'ilamb/ilamb/' changes from 5f35ec3..c692c08 REVERT: 5f35ec3 cf_units requires more recent numpy but does not have it in their requirements REVERT: 4bf570e 2.3 release REVERT: f1ce475 doc cleanup from cfunits move REVERT: 3c88a3a move from cfunits to cf_units REVERT: 515cfb1 added initial logic will excludes outliers when computing functional relationships REVERT: eed6fb0 fixed a bug which would cause a crash if non-string global attributes were found in the netCDF files. Thanks to Declan Valters for the report REVERT: 9197e39 added plot limits to the summary images REVERT: 9af8e66 added a derived expression for twsa and required code support REVERT: 919c5d6 updated the gold scores, fixed an error in handling the bias score in biomass, large differences possible REVERT: 69d7d85 force a mask REVERT: 0ab4b6c fixed some unit issues REVERT: 70d6042 moved where some scalars are saved in the analysis REVERT: 3348b64 neglected to put writes where we can be sure datasets are defined REVERT: f59b823 restored site analysis, needs revamped REVERT: 2c3136a finished memory refactor on the spatial analysis REVERT: 78ce3e5 removed a duplicated function definition REVERT: 0fb6a7f first pass at memory optimzation REVERT: 40fa2e7 added a pass functionality to unit conversions REVERT: 3968d67 added a nbytes routine to determine the size of Variables REVERT: b152fd9 rework of sympifywithargs to reduce memory usage REVERT: a9cefa0 changes to scores based on added iav REVERT: 922d9ac removed numpy restriction REVERT: cb7f921 removed /= because it is causing problems with numpy 1.14 REVERT: c454c2d restored iav to the analysis REVERT: b82ffa0 Merge branch 'ncollier/grace-change' REVERT: 3673d67 changes in gold score to reflect methodology change REVERT: 2420b0c change of directory name to reflect a last name and not a acronym REVERT: 0a7636c added an attempt to purge C for carbon in units REVERT: 23fdf42 added site plots to runoff and fixed twsa/runoff plots to show by basin in the all models tab REVERT: acbdaca small fix to location of a dataset REVERT: 49d8eab added a permafrost extent comparison REVERT: 67c86b4 added a snow water equivalent dataset REVERT: d110066 Merge branch 'master' of bitbucket.org:ncollier/ilamb REVERT: 75a417a numpy 1.14.0 was found to give an error when using the /= operator on masked arrays. For now limiting the upper limit on the version REVERT: f6c7692 Relaxed a requirement that the reference and comparison datasets must be uniformly spatial. This allows us to use a spatial observational dataset to compare against models run at sites. REVERT: d81f144 added a reference explaining why we have changed the methodology REVERT: 7a2e751 changes to grace methodology based on Swenson's suggestions REVERT: f688075 missed a barrier which can be problematic when trying to render composite plots and compute relationships REVERT: 3fae4d8 wrong import REVERT: d58c5e4 changes in gold scores due to method change REVERT: 4997958 rework and abstraction of relationship analysis, fixed warnings in plot generation REVERT: e5d50a4 added more horizontal width per variable label length REVERT: d19c523 added a --rel_only option to ilamb-run that will render summary plots without absolute scores REVERT: 2968f2e safety if no relationships exist REVERT: e4857ed fixed relationship plot REVERT: ce53afd first pass at a tool for extracting scalar scores from files in csv format REVERT: 4aa220d shifts in scoring of bias and rmse REVERT: 3648557 conditional if no relationships exist REVERT: f46c647 Merge branch 'master' of bitbucket.org:ncollier/ilamb REVERT: 369c433 added an option for plotting a colorbar directly on the map plots REVERT: 79a6eeb added a check on the units of the depth dimension from netCDF4 files REVERT: c805be1 enabled plot_unit and table_unit in IOMB configure files REVERT: eb60316 improved heuristics of summary plots REVERT: 174431f fixed scores, reverting back to what they were a few commits ago REVERT: 911e39c missed a conditional REVERT: 8efb687 need to force this mask for some reason REVERT: 49da324 apply intersection mask directly to variables, use the intersection for the regional means REVERT: afb2091 mispelling REVERT: 9c21a55 added protection for if no scores are foudn in the metrics table REVERT: 0ebb8f9 added some safety to guard against nan's appearing in the functional relationship scoring process REVERT: 903baf9 land areas may be masked and cause overflow errors due to large marked values (1e23), added ignores across multiply REVERT: 3db7f84 revamped the relationship summary figure REVERT: 0b8d777 fixed an issue in allmodels pages when no legends were present in figures REVERT: e55f41b added a composite accumulation plot to nbp REVERT: 251972e updated gold scores to match changes, some large shifts occur (~10%) mainly in the spatial distribution as we have now moved back to composite grids REVERT: 6189cb0 added sig digits control to the dataset pages REVERT: b6af223 needs to be https REVERT: 488d019 fixed the all models page REVERT: 6a50632 fix for if no values are in the table REVERT: c37e8e0 Merge branch 'ncollier/revert-method' of bitbucket.org:ncollier/ilamb into ncollier/revert-method REVERT: 3e145a8 added category scoring REVERT: 1075b88 Merge branch 'master' into ncollier/revert-method REVERT: db145a5 forced all score limits to [0,1] REVERT: 8686802 removed dateline plotting which was causing issues REVERT: 0a13357 fixed defaults REVERT: 2d1ae6d fixed relationship pages REVERT: 6ff549b generalized main page, relationship page broken REVERT: 4bbd9d0 added a tropical biomass dataset to the ilamb canon REVERT: 744a16b first stab at reintegration REVERT: 3fe2e67 improvements to interpolation and cell areas REVERT: 12ac6c3 included accounting for overlapping areas REVERT: 0730ccd the definition of the global region was cause the halo of cells around the domain edges to become masked resulting in slightly lower integrated values than should be seen. Hard code this to never mask anything REVERT: 1ddb467 we are seeing odd behavior in basemap plotting, keeping the datum at the origin unless strictly necesary seems to help REVERT: a22630d initial rework git-subtree-dir: ilamb/ilamb git-subtree-split: c692c087d543a8ce67111f7cf860c681b300e3d5 --- README.rst | 41 +- bin/ilamb-run | 9 +- bin/ilamb-table | 65 --- demo/ilamb.cfg | 33 -- doc/install.rst | 4 +- setup.py | 10 +- src/ILAMB/ConfIOMB.py | 15 - src/ILAMB/ConfNBP.py | 46 +-- src/ILAMB/ConfPermafrost.py | 223 ----------- src/ILAMB/ConfRunoff.py | 44 +-- src/ILAMB/ConfTWSA.py | 243 +++++------- src/ILAMB/Confrontation.py | 494 ++++++++++------------- src/ILAMB/ModelResult.py | 5 +- src/ILAMB/Post.py | 409 +++++-------------- src/ILAMB/Regions.py | 5 +- src/ILAMB/Scoreboard.py | 67 +--- src/ILAMB/Variable.py | 154 ++++---- src/ILAMB/__init__.py | 6 +- src/ILAMB/constants.py | 22 -- src/ILAMB/ilamblib.py | 767 +++++++++++------------------------- test/scores_test.csv.gold | 16 +- 21 files changed, 801 insertions(+), 1877 deletions(-) delete mode 100644 bin/ilamb-table delete mode 100644 src/ILAMB/ConfPermafrost.py diff --git a/README.rst b/README.rst index ebbe75ee..86120b54 100644 --- a/README.rst +++ b/README.rst @@ -31,38 +31,27 @@ Useful Information * `CLM `_ - land comparison against 3 CLM versions and 2 forcings * `CMIP5 `_ - land comparison against a collection of CMIP5 models * `IOMB `_ - ocean comparison against a few ocean models - -* Paper `preprint `_ which - details the design and methodology employed in the ILAMB package + * If you find the package or the ouput helpful in your research or development efforts, we kindly ask you to cite the following reference (DOI:10.18139/ILAMB.v002.00/1251621). -ILAMB 2.3 Release +ILAMB 2.2 Release ----------------- -We are pleased to announce version 2.3 of the ILAMB python -package. Among many bugfixes and improvements we highlight these major -changes: - -* You may observe a large shift in some score values. In this version - we solidified our scoring methodology while writing a `paper - `_ which necesitated - reworking some of the scores. For details, see the linked paper. -* Made a memory optimization pass through the analysis routines. Peak - memory usage and the time at peak was reduced improving performance. -* Restructured the symbolic manipulation of derived variables to - greatly reduce the required memory. -* Moved from using cfunits to cf_units. Both are python wrappers - around the UDUNITS library, but cfunits is stagnant and placed a - lower limit to the version of the netCDF4 python wrappers we could - use. -* The scoring of the interannual variability was missed in the port - from version 1 to 2, we have added the metric. -* The terrestrial water storage anomaly GRACE metric was changed to - compare mean anomaly values over large river basins. For details see - the ILAMB paper. - +We are pleased to announce version 2.2 of the ILAMB python package. Among many small bugfixes and enhancements, the new version contains the following new features: + +* A new installed command ``ilamb-fetch`` has been included which can be run to automatically download the observational datasets. Running this command after the data has been downloaded will check your collection for updates and consistency. +* A new installed command ``ilamb-doctor`` has been included which can be run with options similar to ``ilamb-run`` to help identify which values a particular configure file needs in order to run. +* ILAMB will now check the spatial extents of all the models present in the current run and clip away to the largest shared extent. This allows ILAMB to be applied to regional models. +* User-defined regions can now be added at runtime either by specifying latitude/longitude bounds, or a mask in a netCDF4 file. For specifics, consult the regions `tutorial `_. +* Added a runoff and evaporative fraction benchmark to the ILAMB canon, removed the GFED3 and GFED4 burned area data products. +* Added many more plots to the generic output including the RMSE and the score maps. +* The ILAMB core has been enhanced to better handle depths. This has enabled ocean comparisons among others. +* An initial collection of ocean datasets has been assembled in the ``demo/iomb.cfg`` file for ocean benchmarking. +* The plotting phase of ``ilamb-run`` may now be skipped with the ``--skip_plots`` option. +* Relationship overall scores are now available in an image on the main html output page. +* Additional `tutorials `_ have been added to explain these new features. Funding ------- diff --git a/bin/ilamb-run b/bin/ilamb-run index 03f7b9d5..a98e05fd 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -378,7 +378,6 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): print (" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name) sys.stdout.flush() - comm.Barrier() for c in C: if not skip_plots: try: @@ -477,8 +476,7 @@ parser.add_argument('--model_setup', dest="model_setup", type=str, nargs='+',def help='list files model setup information') parser.add_argument('--skip_plots', dest="skip_plots", action="store_true", help='enable to skip the plotting phase') -parser.add_argument('--rel_only', dest="rel_only", action="store_true", - help='enable only display relative differences in overall scores') + args = parser.parse_args() if args.config is None: if rank == 0: @@ -507,8 +505,7 @@ S = Scoreboard(args.config[0], master = rank==0, verbose = not args.quiet, build_dir = args.build_dir[0], - extents = RestrictiveModelExtents(M), - rel_only = args.rel_only) + extents = RestrictiveModelExtents(M)) C = MatchRelationshipConfrontation(S.list()) Cf = FilterConfrontationList(C,args.confront) @@ -523,7 +520,7 @@ if args.logging: if rank == 0: logger.info(" " + " ".join(os.uname())) - for key in ["ILAMB","numpy","matplotlib","netCDF4","cf_units","sympy","mpi4py"]: + for key in ["ILAMB","numpy","matplotlib","netCDF4","cfunits","sympy","mpi4py"]: pkg = __import__(key) try: path = pkg.__path__[0] diff --git a/bin/ilamb-table b/bin/ilamb-table deleted file mode 100644 index 374b6abb..00000000 --- a/bin/ilamb-table +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python -""" -""" -from ILAMB.Scoreboard import Scoreboard -from netCDF4 import Dataset -import os,argparse,sys - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, - help='path to configuration file to use') -parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], - help='path of where to save the output') -parser.add_argument('--csv_file', dest="csv", metavar='csv', type=str, nargs=1,default=["table.csv"], - help='destination filename for the table') - -args = parser.parse_args() -if args.config is None: - print "\nError: You must specify a configuration file using the option --config\n" - sys.exit(1) - -S = Scoreboard(args.config[0],verbose=False,build_dir=args.build_dir[0]) - -region = "global" -scalar = "RMSE" -sname = "%s %s" % (scalar,region) -group = "MeanState" -table = {} -unit = {} -for c in S.list(): - for subdir, dirs, files in os.walk(c.output_path): - for fname in files: - if not fname.endswith(".nc"): continue - with Dataset(os.path.join(c.output_path,fname)) as dset: - if group not in dset.groups .keys(): continue - if "scalars" not in dset.groups[group].groups.keys(): continue - grp = dset.groups[group]["scalars"] - if sname not in grp.variables.keys(): continue - var = grp.variables[sname] - if not table.has_key(c.longname): - table[c.longname] = {} - unit [c.longname] = var.units - table[c.longname][dset.name] = var[...] - -# What models have data? -models = [] -for key in table.keys(): - for m in table[key].keys(): - if m not in models: models.append(m) -models.sort() - -# render a table of values in csv format -lines = ",".join(["Name","Units"] + models) -for c in S.list(): - if not table.has_key(c.longname): continue - line = "%s,%s" % (c.longname,unit[c.longname]) - for m in models: - if table[c.longname].has_key(m): - line += ",%g" % (table[c.longname][m]) - else: - line += "," - lines += "\n%s" % line - -with file(args.csv[0],mode="w") as f: - f.write(lines) - diff --git a/demo/ilamb.cfg b/demo/ilamb.cfg index 793227fd..56ae4a2b 100644 --- a/demo/ilamb.cfg +++ b/demo/ilamb.cfg @@ -23,13 +23,6 @@ table_unit = "Pg" plot_unit = "kg m-2" space_mean = False -[Tropical] -source = "DATA/biomass/Tropical/biomass_0.5x0.5.nc" -weight = 20 -table_unit = "Pg" -plot_unit = "kg m-2" -space_mean = False - [GlobalCarbon] source = "DATA/biomass/GLOBAL.CARBON/biomass_0.5x0.5.nc" weight = 16 @@ -284,7 +277,6 @@ skip_iav = True [h2: Terrestrial Water Storage Anomaly] variable = "twsa" alternate_vars = "tws" -derived = "pr-evspsbl-mrro" cmap = "Blues" weight = 5 ctype = "ConfTWSA" @@ -293,31 +285,6 @@ ctype = "ConfTWSA" source = "DATA/twsa/GRACE/twsa_0.5x0.5.nc" weight = 25 -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Snow Water Equivalent] -variable = "swe" -alternate_vars = "snw" -cmap = "Blues" -weight = 5 - -[CanSISE] -source = "DATA/swe/CanSISE/swe.nc" -weight = 25 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Permafrost] -variable = "tsl" - -[NSIDC] -ctype = "ConfPermafrost" -source = "DATA/permafrost/NSIDC/NSIDC_0.5x0.5.nc" -y0 = 1970. -yf = 2000. -Teps = 273.15 -dmax = 3.5 - ########################################################################### [h1: Radiation and Energy Cycle] diff --git a/doc/install.rst b/doc/install.rst index 84a85043..7ccfaaf9 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -25,7 +25,7 @@ include: * netCDF4_, a python/numpy interface to the netCDF C library (you must have the C library installed) * sympy_, a python library for symbolic mathematics * mpi4py_, a python wrapper around the MPI library (you must have a MPI implementation installed) -* cf_units_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) +* cfunits_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) I have designated that a few of these dependencies are python interfaces to C libraries and so the library must also be installed @@ -271,7 +271,7 @@ Next open the local copy of the file with a editor and search for .. _numpy: https://www.numpy.org/ .. _matplotlib: https://matplotlib.org/ .. _netCDF4: https://github.com/Unidata/netcdf4-python -.. _cf_units: https://github.com/SciTools/cf-units +.. _cfunits: https://bitbucket.org/cfpython/cfunits-python/ .. _basemap: https://github.com/matplotlib/basemap .. _sympy: https://www.sympy.org/ .. _mpi4py: https://pythonhosted.org/mpi4py/ diff --git a/setup.py b/setup.py index b189267f..e36e7ff2 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess import os -VERSION = '2.3' +VERSION = '2.2' def git_version(): """ @@ -97,12 +97,12 @@ def write_version_py(filename=os.path.join('src/ILAMB', 'generated_version.py')) keywords=['benchmarking','earth system modeling','climate modeling','model intercomparison'], packages=['ILAMB'], package_dir={'ILAMB' : 'src/ILAMB'}, - scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor','bin/ilamb-table'], - install_requires=['numpy>=1.11.0', + scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor'], + install_requires=['numpy>=1.9.2', 'matplotlib>=1.4.3', #'basemap>=1.0.7', # basemap is in pypi but broken, need to manually install - 'netCDF4>=1.1.4', - 'cf_units>=2.0.0', + 'netCDF4>=1.1.4,<=1.2.4', # upper limit is for cfunits + 'cfunits>=1.1.4', 'sympy>=0.7.6', 'mpi4py>=1.3.1', 'scipy>=0.9.0'] diff --git a/src/ILAMB/ConfIOMB.py b/src/ILAMB/ConfIOMB.py index 7de75a4f..5047b61a 100644 --- a/src/ILAMB/ConfIOMB.py +++ b/src/ILAMB/ConfIOMB.py @@ -183,21 +183,6 @@ def _profileScore(ref,com,region): if layered: obsout += [o2,o3,o4] modout += [m2,m3,m4,sp] - - # Unit conversions - def _convert(var,unit): - if type(var) == type({}): - for key in var.keys(): var[key].convert(unit) - else: - var.convert(unit) - table_unit = self.keywords.get("table_unit",None) - plot_unit = self.keywords.get("plot_unit" ,None) - if table_unit is not None: - for var in [op,mp,mb,mr]: - _convert(var,table_unit) - if plot_unit is not None: - for var in [o1,m1,d1,r1,o2,m2,o3,m3,o4,m4]: - _convert(var,plot_unit) # Dump to files def _write(out_vars,results): diff --git a/src/ILAMB/ConfNBP.py b/src/ILAMB/ConfNBP.py index 96e618e0..97a3b47d 100644 --- a/src/ILAMB/ConfNBP.py +++ b/src/ILAMB/ConfNBP.py @@ -144,25 +144,20 @@ def compositePlots(self): # get the HTML page page = [page for page in self.layout.pages if "MeanState" in page.name][0] - colors = {} - corr = {} - std = {} - accum = {} + colors = [] + corr = [] + std = [] for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + if "Benchmark" in fname: continue dataset = Dataset(fname) if "MeanState" not in dataset.groups: continue - dset = dataset.groups["MeanState"] - mname = dataset.getncattr("name") - colors[mname] = dataset.getncattr("color") + dset = dataset.groups["MeanState"] + colors.append(dataset.getncattr("color")) key = [v for v in dset.groups["scalars"].variables.keys() if ("Temporal Distribution Score" in v)] if len(key) > 0: - sds = dset.groups["scalars"].variables[key[0]] - corr[mname] = sds.R - std [mname] = sds.std - if "accumulate_of_nbp_over_global" in dset.variables.keys(): - accum[mname] = Variable(filename = fname, - variable_name = "accumulate_of_nbp_over_global", - groupname = "MeanState") + sds = dset.groups["scalars"].variables[key[0]] + corr.append(sds.getncattr("R" )) + std .append(sds.getncattr("std")) # temporal distribution Taylor plot if len(corr) > 0: @@ -172,27 +167,6 @@ def compositePlots(self): side = "TEMPORAL TAYLOR DIAGRAM", legend = False) fig = plt.figure(figsize=(6.0,6.0)) - keys = corr.keys() - post.TaylorDiagram(np.asarray([std [key] for key in keys]), - np.asarray([corr[key] for key in keys]), - 1.0,fig, - [colors[key] for key in keys]) + post.TaylorDiagram(np.asarray(std),np.asarray(corr),1.0,fig,colors) fig.savefig(os.path.join(self.output_path,"temporal_variance.png")) plt.close() - - - # composite annual cycle plot - if len(accum) > 1: - page.addFigure("Spatially integrated regional mean", - "compaccumulation", - "RNAME_compaccumulation.png", - side = "ACCUMULATION", - legend = False) - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - dy = 0.05*(self.limits["accumulate"]["global"]["max"]-self.limits["accumulate"]["global"]["min"]) - for key in accum: - accum[key].plot(ax,lw=2,color=colors[key],label=key, - vmin=self.limits["accumulate"]["global"]["min"]-dy, - vmax=self.limits["accumulate"]["global"]["max"]+dy) - fig.savefig(os.path.join(self.output_path,"global_compaccumulation.png" )) - plt.close() diff --git a/src/ILAMB/ConfPermafrost.py b/src/ILAMB/ConfPermafrost.py deleted file mode 100644 index 92c2ead4..00000000 --- a/src/ILAMB/ConfPermafrost.py +++ /dev/null @@ -1,223 +0,0 @@ -from Confrontation import Confrontation -from mpl_toolkits.basemap import Basemap -from Variable import Variable -from Post import ColorBar -import matplotlib.pyplot as plt -from netCDF4 import Dataset -import ilamblib as il -import numpy as np - -class ConfPermafrost(Confrontation): - - def __init__(self,**keywords): - - # Ugly, but this is how we call the Confrontation constructor - super(ConfPermafrost,self).__init__(**keywords) - - # Now we overwrite some things which are different here - self.layout - self.regions = ["global"] - self.layout.regions = self.regions - self.weight = { "Obs Score" : 1., - "Mod Score" : 1. } - for page in self.layout.pages: - page.setMetricPriority(["Total Area" , - "Overlap Area", - "Missed Area" , - "Excess Area" , - "Obs Score" , - "Mod Score" , - "Overall Score"]) - - def stageData(self,m): - - obs = Variable(filename = self.source, - variable_name = "permafrost_extent") - - # These parameters may be changed from the configure file - y0 = float(self.keywords.get("y0" ,1970.)) # [yr] beginning year to include in analysis - yf = float(self.keywords.get("yf" ,2000.)) # [yr] end year to include in analysis - dmax = float(self.keywords.get("dmax",3.5)) # [m] consider layers where depth in is the range [0,dmax] - Teps = float(self.keywords.get("Teps",273.15)) # [K] temperature below which we assume permafrost occurs - - t0 = (y0 -1850.)*365. - tf = (yf+1-1850.)*365. - mod = m.extractTimeSeries(self.variable, - initial_time = t0, - final_time = tf) - mod.trim(t = [t0 ,tf ], - lat = [obs.lat.min(),90 ], - d = [0 ,dmax]) - mod = mod.annualCycle() - Tmax = mod.data.max(axis=0) - table = np.zeros(Tmax.shape[-2:]) - table[...] = np.NAN - thaw = np.zeros(table.shape,dtype=bool) - for i in range(mod.depth_bnds.shape[0]-1,-1,-1): - thaw += (Tmax[i]>=Teps) - frozen = np.where((Tmax[i] 60: fsize = 10 + ax.set_ylabel(ylabel,fontsize=fsize) + ax.set_xlim(ind_min,ind_max) + ax.set_ylim(dep_min,dep_max) + short_name = "rel_%s" % ind_name + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) + plt.close() + + # add the figure to the HTML layout + if name == "Benchmark" and region == "global": + short_name = short_name.replace("global_","") + page.addFigure(c.longname, + "benchmark_" + short_name, + "Benchmark_RNAME_%s.png" % (short_name), + legend = False, + benchmark = False) + page.addFigure(c.longname, + short_name, + "MNAME_RNAME_%s.png" % (short_name), + legend = False, + benchmark = False) + + # determine the 1D relationship curves + bins = np.linspace(ind_min,ind_max,nbin+1) + delta = 0.1*(bins[1]-bins[0]) + inds = np.digitize(x,bins) + ids = np.unique(inds).clip(1,bins.size-1) + xb = [] + yb = [] + eb = [] + for i in ids: + yt = y[inds==i] + xi = 0.5 + xb.append(xi*bins[i-1]+(1.-xi)*bins[i]) + yb.append(yt.mean()) + try: + eb.append(yt.std()) # for some reason this fails sometimes + except: + eb.append(np.sqrt(((yt-yb[-1])**2).sum()/float(yt.size))) + + if name == "Benchmark": + obs_x = np.asarray(xb) + obs_y = np.asarray(yb) + obs_e = np.asarray(eb) + else: + mod_x = np.asarray(xb) + mod_y = np.asarray(yb) + mod_e = np.asarray(eb) + + # compute and plot the difference + O = np.array(obs_dist.data) + M = np.array(mod_dist.data) + O[np.where(obs_dist.mask)] = 0. + M[np.where(mod_dist.mask)] = 0. + dif_dist = np.ma.masked_array(M-O,mask=obs_dist.mask*mod_dist.mask) + lim = np.abs(dif_dist).max() + fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) + pc = ax.pcolormesh(xedges, + yedges, + dif_dist, + cmap = "Spectral_r", + vmin = -lim, + vmax = +lim) + div = make_axes_locatable(ax) + fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), + orientation="vertical", + label="Distribution Difference") + ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) + ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) + ax.set_xlim(ind_min,ind_max) + ax.set_ylim(dep_min,dep_max) + short_name = "rel_diff_%s" % ind_name + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) + plt.close() page.addFigure(c.longname, - "rel_diff_%s" % ind_name, - "MNAME_RNAME_rel_diff_%s.png" % ind_name, - legend = False, + short_name, + "MNAME_RNAME_%s.png" % (short_name), + legend = False, benchmark = False) + + # score the distributions = 1 - Hellinger distance + score = 1.-np.sqrt(((np.sqrt(obs_dist)-np.sqrt(mod_dist))**2).sum())/np.sqrt(2) + vname = '%s Score %s' % (c.longname.split('/')[0],region) + #if vname in scalars.variables: + # scalars.variables[vname][0] = score + #else: + # Variable(name = vname, + # unit = "1", + # data = score).toNetCDF4(results,group="Relationships") + + # plot the 1D curve + fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) + ax.errorbar(obs_x-delta,obs_y,yerr=obs_e,fmt='-o',color='k') + ax.errorbar(mod_x+delta,mod_y,yerr=mod_e,fmt='-o',color=m.color) + ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) + ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) + ax.set_xlim(ind_min,ind_max) + ax.set_ylim(dep_min,dep_max) + short_name = "rel_func_%s" % ind_name + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) + plt.close() page.addFigure(c.longname, - "rel_func_%s" % ind_name, - "MNAME_RNAME_rel_func_%s.png" % ind_name, - legend = False, + short_name, + "MNAME_RNAME_%s.png" % (short_name), + legend = False, benchmark = False) - - # Analysis over regions - lim_dep = [dep_min,dep_max] - lim_ind = [ind_min,ind_max] - longname = c.longname.split('/')[0] - for region in self.regions: - ref_dist = _buildDistributionResponse(ref_ind,ref_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) - com_dist = _buildDistributionResponse(com_ind,com_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) - - # Make the plots - _plotDistribution(ref_dist[0],ref_dist[1],ref_dist[2], - "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), - "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), - os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) - _plotDistribution(com_dist[0],com_dist[1],com_dist[2], - "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), - "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), - os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) - _plotDifference (ref_dist[0],com_dist[0],ref_dist[1],ref_dist[2], - "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), - "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), - os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) - _plotFunction (ref_dist[3],ref_dist[4],com_dist[3],com_dist[4],ref_dist[1],ref_dist[2], - "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), - "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), - m.color, - os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) - - # Score the distribution - score = _scoreDistribution(ref_dist[0],com_dist[0]) - sname = "%s Hellinger Distance %s" % (longname,region) - if sname in scalars.variables: - scalars.variables[sname][0] = score - else: - Variable(name = sname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - # Score the functional response - score = _scoreFunction(ref_dist[3],com_dist[3]) - sname = "%s RMSE Score %s" % (longname,region) - if sname in scalars.variables: - scalars.variables[sname][0] = score - else: - Variable(name = sname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - + # score the relationship + i0,i1 = np.where(np.abs(obs_x[:,np.newaxis]-mod_x)<1e-12) + score = np.exp(-np.linalg.norm(obs_y[i0]-mod_y[i1])/np.linalg.norm(obs_y[i0])) + vname = '%s RMSE Score %s' % (c.longname.split('/')[0],region) + if vname in scalars.variables: + scalars.variables[vname][0] = score + else: + Variable(name = vname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") - + + results.close() class FileContextManager(): diff --git a/src/ILAMB/ModelResult.py b/src/ILAMB/ModelResult.py index 6980d8b8..1aaf7838 100644 --- a/src/ILAMB/ModelResult.py +++ b/src/ILAMB/ModelResult.py @@ -138,10 +138,8 @@ def _getGridInformation(self): else: self.land_fraction = (Dataset(self.variables["sftlf"][0]).variables["sftlf"])[...] # some models represent the fraction as a percent - if np.ma.max(self.land_fraction) > 1: self.land_fraction *= 0.01 - np.seterr(over='ignore') + if np.ma.max(self.land_fraction) > 1: self.land_fraction *= 0.01 self.land_areas = self.cell_areas*self.land_fraction - np.seterr(over='warn') self.land_area = np.ma.sum(self.land_areas) return @@ -266,6 +264,7 @@ def derivedVariable(self,variable_name,expression,lats=None,lons=None,initial_ti """ from sympy import sympify + from cfunits import Units if expression is None: raise il.VarNotInModel() args = {} units = {} diff --git a/src/ILAMB/Post.py b/src/ILAMB/Post.py index 809b154d..7b055a82 100644 --- a/src/ILAMB/Post.py +++ b/src/ILAMB/Post.py @@ -213,29 +213,20 @@ def __init__(self,name,title): self.regions = None self.metrics = None self.units = None - self.priority = ["original","Model","intersection","complement","Benchmark","Bias","RMSE","Phase","Seasonal","Spatial","Interannual","Score","Overall"] + self.priority = ["Bias","RMSE","Phase","Seasonal","Spatial","Interannual","Score","Overall"] self.header = "CNAME" self.sections = [] self.figures = {} self.text = None - self.inserts = [] def __str__(self): r = Regions() - def _sortFigures(figure): - macro = ["timeint","bias","rmse","iav","phase","shift","variance","spaceint","accumulate","cycle"] + def _sortFigures(figure,priority=["benchmark_timeint","timeint","timeintremap","bias","rmse","benchmark_phase","phase","shift","biasscore","rmsescore","shiftscore","spatial_variance","legend_spatial_variance","spaceint","accumulate","cycle","dtcycle","compcycle","temporal_variance"]): val = 1. - for i,m in enumerate(macro): - if m in figure.name: val += 3**i - if figure.name.startswith("benchmark"): val -= 1. - if figure.name.endswith("score"): val += 1. - if figure.name.startswith("legend"): - if "variance" in figure.name: - val += 1. - else: - val = 0. - return val + for i,pname in enumerate(priority): + if pname == figure.name: val += 2**i + return val code = """
    @@ -258,7 +249,7 @@ def _sortFigures(figure): if self.regions: code += """ - """ % (self.name,self.name) for region in self.regions: try: rname = r.getRegionName(region) @@ -271,21 +262,11 @@ def _sortFigures(figure): """ % (region,opts,rname) code += """ """ - - if self.models: - code += """ -
    - -
    """ - if self.metric_dict: code += self.metricsToHtmlTables() - + if self.metric_dict: + code += """ +
    """ % self.name + if self.text is not None: code += """ %s""" % self.text @@ -324,66 +305,6 @@ def addFigure(self,section,name,pattern,side=None,legend=False,benchmark=False,l def setMetricPriority(self,priority): self.priority = priority - def metricsToHtmlTables(self): - if not self.metric_dict: return "" - regions = self.regions - metrics = self.metrics - units = self.units - cname = self.cname.split(" / ") - if len(cname) == 3: - cname = cname[1].strip() - else: - cname = cname[-1].strip() - html = "" - inserts = self.inserts - j0 = 0 if "Benchmark" in self.models else -1 - score_sig = 2 # number of significant digits used in the score tables - other_sig = 3 # number of significant digits used for non-score quantities - for region in regions: - html += """ -
    - - - - - """ % (self.name,region) - for i,metric in enumerate(metrics): - if i in inserts: html += """ - """ - html += """ - """ % (metric,units[metric]) - html += """ - - - """ - - for j,model in enumerate(self.models): - opts = ' onclick="highlightRow%s(this)"' % (self.name) if j > j0 else '' - html += """ - - %s - [-]""" % (opts,model,opts,cname,model) - for i,metric in enumerate(metrics): - sig = score_sig if "score" in metric.lower() else other_sig - if i in inserts: html += """ - """ % (opts) - add = "" - try: - add = ("%#." + "%d" % sig + "g") % self.metric_dict[model][region][metric].data - add = add.lower().replace("nan","") - except: - pass - html += """ - %s""" % (opts,add) - html += """ - """ - html += """ - -
    Download Data
    %s [%s]
    -
    """ - - return html - def googleScript(self): if not self.metric_dict: return "" models = self.models @@ -395,130 +316,77 @@ def googleScript(self): cname = cname[1].strip() else: cname = cname[-1].strip() - - - - rows = "" - for section in self.sections: - for figure in self.figures[section]: - rows += figure.generateClickRow() - - head = """ - - function updateImagesAndHeaders%s(){ - var rsel = document.getElementById("%sRegion"); - var msel = document.getElementById("%sModel"); - var rid = rsel.selectedIndex; - var mid = msel.selectedIndex; - var RNAME = rsel.options[rid].value; - var MNAME = msel.options[mid].value; - var CNAME = "%s"; - var head = "%s"; - head = head.replace("CNAME",CNAME).replace("RNAME",RNAME).replace("MNAME",MNAME); - $("#%sHead").text(head); - %s - }""" % (self.name,self.name,self.name,self.cname,self.header,self.name,rows) - - nscores = len(metrics) - if len(self.inserts) > 0: nscores -= self.inserts[-1] - r0 = 2 if "Benchmark" in models else 1 - + callback = "%sTable()" % self.name + head = """ + function %sTable() { + var data = new google.visualization.DataTable(); + data.addColumn('string','Model'); + data.addColumn('string','Data');""" % (self.name) + for region in regions: + for metric in metrics: + head += """ + data.addColumn('number','%s [%s]');""" % (metric,metric,units[metric]) head += """ - - function highlightRow%s(cell) { - var select = document.getElementById("%sRegion"); - for (var i = 0; i < select.length; i++){ - var table = document.getElementById("%s_table_" + select.options[i].value); - var rows = table.getElementsByTagName("tr"); - for (var r = %d; r < rows.length; r++) { - for (var c = 0; c < rows[r].cells.length-%d; c++) { - rows[r].cells[c].style.backgroundColor = "#ffffff"; - } - } - var r = cell.closest("tr").rowIndex; - document.getElementById("%sModel").selectedIndex = r-1; - for (var c = 0; c < rows[r].cells.length-%d; c++) { - rows[r].cells[c].style.backgroundColor = "#c1c1c1"; - } - } - updateImagesAndHeaders%s(); - }""" % (self.name,self.name,self.name,r0,nscores+1,self.name,nscores+1,self.name) - + data.addRows([""" + for model in models: + head += """ + ['%s','[-]'""" % (model,cname,model) + for region in regions: + for metric in metrics: + add = ", null" + try: + add = ",%.03f" % self.metric_dict[model][region][metric].data + add = add.lower().replace("nan","null") + except: + pass + head += add + head += "]," head += """ + ]);""" - function paintScoreCells%s(RNAME) { - var colors = ['#fb6a4a','#fc9272','#fcbba1','#fee0d2','#fff5f0','#f7fcf5','#e5f5e0','#c7e9c0','#a1d99b','#74c476']; - var table = document.getElementById("%s_table_" + RNAME); - var rows = table.getElementsByTagName("tr"); - for (var c = rows[0].cells.length-%d; c < rows[0].cells.length; c++) { - var scores = []; - for (var r = %d; r < rows.length; r++) { - val = rows[r].cells[c].innerHTML; - if (val=="") { - scores[r-%d] = 0; - }else{ - scores[r-%d] = parseFloat(val); - } - } - var mean = math.mean(scores); - var std = math.max(0.02,math.std(scores)); - for (var r = %d; r < rows.length; r++) { - scores[r-%d] = (scores[r-%d]-mean)/std; - } - var smax = math.max(scores); - var smin = math.min(scores); - if (math.abs(smax-smin) < 1e-12) { - smin = -1.0; - smax = 1.0; - } - for (var r = %d; r < rows.length; r++) { - var clr = math.round((scores[r-%d]-smin)/(smax-smin)*10); - clr = math.min(9,math.max(0,clr)); - rows[r].cells[c].style.backgroundColor = colors[clr]; - } - } - }""" % (self.name,self.name,nscores,r0,r0,r0,r0,r0,r0,r0,r0) + n = len(metrics) + cols = (str(range(2,n+2))[1:]).replace(", ",", %d*rid+" % n) + cols = "%d*rid+2" % n + cols[1:] + head += """ + var view = new google.visualization.DataView(data); + var rid = document.getElementById("%sRegion").selectedIndex + view.setColumns([0,1,%s);""" % (self.name,cols) head += """ + var table = new google.visualization.Table(document.getElementById('%s_table')); + table.draw(view, {showRowNumber: false,allowHtml: true});""" % self.name - function pageLoad%s() { - var select = document.getElementById("%sRegion"); - var region = getQueryVariable("region"); - var model = getQueryVariable("model"); - if (region) { - for (var i = 0; i < select.length; i++){ - if (select.options[i].value == region) select.selectedIndex = i; - } - } - var table = document.getElementById("%s_table_" + select.options[select.selectedIndex].value); - var rows = table.getElementsByTagName("tr"); - if (model) { - for (var r = 0; r < rows.length; r++) { - if(rows[r].cells[0].innerHTML==model) highlightRow%s(rows[r].cells[0]); - } - }else{ - highlightRow%s(rows[%d]); - } - for (var i = 0; i < select.length; i++){ - paintScoreCells%s(select.options[i].value); - } - changeRegion%s(); - } - - function changeRegion%s() { - var select = document.getElementById("%sRegion"); - for (var i = 0; i < select.length; i++){ - RNAME = select.options[i].value; - if (i == select.selectedIndex) { - document.getElementById("%s_table_" + RNAME).style.display = "table"; - }else{ - document.getElementById("%s_table_" + RNAME).style.display = "none"; - } - } - updateImagesAndHeaders%s(); - }""" % (self.name,self.name,self.name,self.name,self.name,r0,self.name,self.name,self.name,self.name,self.name,self.name,self.name) - - return head,"pageLoad%s" % self.name,"" + head += """ + function clickRow() { + var header = "%s"; + var CNAME = "%s"; + header = header.replace("CNAME",CNAME); + var rid = document.getElementById("%s").selectedIndex; + var RNAME = document.getElementById("%s").options[rid].value; + header = header.replace("RNAME",RNAME); + var select = table.getSelection() + row = select[0].row;""" % (self.header,self.cname,self.name+"Region",self.name+"Region") + if "Benchmark" in models: + head += """ + if (row == 0) { + table.setSelection([{'row': 1}]); + clickRow(); + return; + }""" + head += """ + var MNAME = data.getValue(row,0); + header = header.replace("MNAME",MNAME); + $("#%sHead").text(header);""" % (self.name) + for section in self.sections: + for figure in self.figures[section]: + head += figure.generateClickRow() + head += """ + } + google.visualization.events.addListener(table, 'select', clickRow); + table.setSelection([{'row': 0}]); + clickRow(); + }""" + return head,callback,"table" def setRegions(self,regions): assert type(regions) == type([]) @@ -556,11 +424,6 @@ def _sortMetrics(name,priority=self.priority): self.metrics = metrics self.units = units - tmp = [("bias" in m.lower()) for m in metrics] - if tmp.count(True) > 0: self.inserts.append(tmp.index(True)) - tmp = [("score" in m.lower()) for m in metrics] - if tmp.count(True) > 0: self.inserts.append(tmp.index(True)) - def head(self): return "" @@ -643,7 +506,6 @@ def __str__(self): elif plot.longname is not None: name = plot.longname if "rel_" in plot.name: name = plot.name.replace("rel_","Relationship with ") - if name == "": continue opts = '' if plot.name == "timeint" or len(self.plots) == 1: opts = ' selected="selected"' @@ -652,15 +514,12 @@ def __str__(self): code += """ """ - fig = self.plots[0] - rem_side = fig.side - fig.side = "MNAME" - rem_leg = fig.legend - fig.legend = True - img = "%s" % (fig) - img = img.replace('"leg"','"MNAME_legend"').replace("%s" % fig.name,"MNAME") - fig.side = rem_side - fig.legend = rem_leg + fig = self.plots[0] + rem_side = fig.side + fig.side = "MNAME" + img = "%s" % (fig) + img = img.replace('"leg"','"MNAME_legend"').replace("%s" % fig.name,"MNAME") + fig.side = rem_side for model in self.pages[0].models: code += img.replace("MNAME",model) @@ -673,8 +532,7 @@ def __str__(self): return code def googleScript(self): - head = self.head() - return head,"","" + return "","","" def head(self): @@ -687,6 +545,7 @@ def head(self): except: pass head = """ + + """ return head class HtmlSitePlotsPage(HtmlPage): @@ -842,14 +703,11 @@ def head(self): class HtmlLayout(): def __init__(self,pages,cname,years=None): - + self.pages = pages self.cname = cname.replace("/"," / ") if years is not None: - try: - self.cname += " / %d-%d" % (years) - except: - pass + self.cname += " / %d-%d" % (years) for page in self.pages: page.pages = self.pages page.cname = self.cname @@ -863,19 +721,9 @@ def __str__(self): - - """ - + """ + + ### stick in javascript stuff here functions = [] callbacks = [] packages = [] @@ -886,25 +734,25 @@ def __str__(self): if f != "": functions.append(f) if c != "": callbacks.append(c) if p != "": packages.append(p) - + code += """ """ - - code += """ - """ - max_height = 280 # will be related to max column header length across all pages + for page in self.pages: code += page.head() + + ### stick in css stuff here code += """ """ % (max_height,max_height/2-5) + """ code += """ - """ + """ ### loop over pages for page in self.pages: code += "%s" % (page) @@ -1057,15 +874,8 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa # define some parameters nmodels = len(models) nvariables = len(variables) - maxV = max([len(v) for v in variables]) - maxM = max([len(m) for m in models]) - wpchar = 0.15 - wpcell = 0.19 - hpcell = 0.25 - w = maxV*wpchar + max(4,nmodels)*wpcell - if not rel_only: w += (max(4,nmodels)+1)*wpcell - h = maxM*wpchar + nvariables*hpcell + 1.0 - + w = max((nmodels-3.)/(14.-3.)*(9.5-5.08)+5.08,7.) # heuristic for figure size + h = 8. bad = 0.5 if "stoplight" not in plt.colormaps(): RegisterCustomColormaps() @@ -1087,7 +897,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa format="%g", cax=div.append_axes("bottom", size="5%", pad=0.05), orientation="horizontal", - label="Absolute Score") + label="Variable Score") plt.tick_params(which='both', length=0) ax[0].xaxis.tick_top() ax[0].set_xticks (np.arange(nmodels )+0.5) @@ -1095,9 +905,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa ax[0].set_yticks (np.arange(nvariables)+0.5) ax[0].set_yticklabels(variables[::-1]) ax[0].tick_params('both',length=0,width=0,which='major') - ax[0].tick_params(axis='y',pad=10) - ax[0].set_xlim(0,nmodels) - ax[0].set_ylim(0,nvariables) + ax[0].tick_params(axis='y', pad=10) if vcolor is not None: for i,t in enumerate(ax[0].yaxis.get_ticklabels()): t.set_backgroundcolor(vcolor[::-1][i]) @@ -1123,14 +931,13 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa format="%+d", cax=div.append_axes("bottom", size="5%", pad=0.05), orientation="horizontal", - label="Relative Score") + label="Variable Z-score") plt.tick_params(which='both', length=0) ax[i].xaxis.tick_top() ax[i].set_xticks(np.arange(nmodels)+0.5) ax[i].set_xticklabels(models,rotation=90) ax[i].tick_params('both',length=0,width=0,which='major') ax[i].set_yticks([]) - ax[i].set_xlim(0,nmodels) ax[i].set_ylim(0,nvariables) if rel_only: ax[i].set_yticks (np.arange(nvariables)+0.5) diff --git a/src/ILAMB/Regions.py b/src/ILAMB/Regions.py index 853123e6..704207f6 100644 --- a/src/ILAMB/Regions.py +++ b/src/ILAMB/Regions.py @@ -174,9 +174,8 @@ def hasData(self,label,var): # Populate some regions r = Regions() - r.addRegionLatLonBounds("global","Globe",(-89.999, 89.999),(-179.999, 179.999)) - Regions._regions["global"][3][...] = 0. # ensure global mask is null - + r.addRegionLatLonBounds("global","Globe",(-89.75, 89.75),(-179.75, 179.75)) + # GFED regions r.addRegionLatLonBounds("bona","Boreal North America", ( 49.75, 79.75),(-170.25,- 60.25)) r.addRegionLatLonBounds("tena","Temperate North America", ( 30.25, 49.75),(-125.25,- 66.25)) diff --git a/src/ILAMB/Scoreboard.py b/src/ILAMB/Scoreboard.py index 5fceb929..502a03ab 100644 --- a/src/ILAMB/Scoreboard.py +++ b/src/ILAMB/Scoreboard.py @@ -5,7 +5,6 @@ from ConfEvapFraction import ConfEvapFraction from ConfIOMB import ConfIOMB from ConfDiurnal import ConfDiurnal -from ConfPermafrost import ConfPermafrost import os,re from netCDF4 import Dataset import numpy as np @@ -180,19 +179,17 @@ def ParseScoreboardConfigureFile(filename): "ConfRunoff" : ConfRunoff, "ConfEvapFraction": ConfEvapFraction, "ConfIOMB" : ConfIOMB, - "ConfDiurnal" : ConfDiurnal, - "ConfPermafrost" : ConfPermafrost} + "ConfDiurnal" : ConfDiurnal} class Scoreboard(): """ A class for managing confrontations """ - def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None,rel_only=False): + def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None): if not os.environ.has_key('ILAMB_ROOT'): raise ValueError("You must set the environment variable 'ILAMB_ROOT'") self.build_dir = build_dir - self.rel_only = rel_only if (master and not os.path.isdir(self.build_dir)): os.mkdir(self.build_dir) @@ -275,7 +272,7 @@ def createHtml(self,M,filename="index.html"): has_rel = np.asarray([len(rel.children) for rel in rel_tree.children]).sum() > 0 nav = "" if has_rel: - GenerateRelSummaryFigure(rel_tree,M,"%s/overview_rel.png" % self.build_dir,rel_only=self.rel_only) + GenerateSummaryFigure(rel_tree,M,"%s/overview_rel.png" % self.build_dir) nav = """
  • Relationship
  • """ #global global_print_node_string @@ -415,8 +412,7 @@ def createHtml(self,M,filename="index.html"): """ - html += GenerateTable(self.tree,M,self) - + for tree in self.tree.children: html += GenerateTable(tree,M,self) html += """ @@ -437,7 +433,7 @@ def createHtml(self,M,filename="index.html"): """ - html += GenerateTable(rel_tree,M,self,composite=False) + for tree in rel_tree.children: html += GenerateTable(tree,M,self,composite=False) html += """ @@ -456,7 +452,7 @@ def createBarCharts(self,M): html = GenerateBarCharts(self.tree,M) def createSummaryFigure(self,M): - GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir,rel_only=self.rel_only) + GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir) def dumpScores(self,M,filename): out = file("%s/%s" % (self.build_dir,filename),"w") @@ -525,7 +521,6 @@ def rgb_to_hex(rgb): def BuildHTMLTable(tree,M,build_dir): global global_model_list global_model_list = M - global global_table_color def _genHTML(node): global global_html global global_table_color @@ -565,27 +560,20 @@ def _genHTML(node): row += '
    ' global_html += row - for cat in tree.children: - global_table_color = cat.bgcolor - for var in cat.children: - TraversePreorder(var,_genHTML) - cat.name += " Summary" - _genHTML(cat) - cat.name.replace(" Summary","") - global_table_color = tree.bgcolor - tree.name = "Overall Summary" - _genHTML(tree) + TraversePreorder(tree,_genHTML) def GenerateTable(tree,M,S,composite=True): global global_html global global_model_list + global global_table_color if composite: CompositeScores(tree,M) global_model_list = M + global_table_color = tree.bgcolor global_html = "" - BuildHTMLTable(tree,M,S.build_dir) + for cat in tree.children: BuildHTMLTable(cat,M,S.build_dir) return global_html -def GenerateSummaryFigure(tree,M,filename,rel_only=False): +def GenerateSummaryFigure(tree,M,filename): models = [m.name for m in M] variables = [] @@ -605,35 +593,9 @@ def GenerateSummaryFigure(tree,M,filename,rel_only=False): else: data[row,:] = var.score - BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors,rel_only=rel_only) - -def GenerateRelSummaryFigure(S,M,figname,rel_only=False): - - # reorganize the relationship data - scores = {} - counts = {} - rows = [] - vcolors = [] - for h1 in S.children: - for dep in h1.children: - dname = dep.name.split("/")[0] - for ind in dep.children: - iname = ind.name.split("/")[0] - key = "%s/%s" % (dname,iname) - if scores.has_key(key): - scores[key] += ind.score - counts[key] += 1. - else: - scores[key] = np.copy(ind.score) - counts[key] = 1. - rows .append(key) - vcolors.append(h1.bgcolor) - if len(rows) == 0: return - data = np.ma.zeros((len(rows),len(M))) - for i,row in enumerate(rows): - data[i,:] = scores[row] / counts[row] - BenchmarkSummaryFigure([m.name for m in M],rows,data,figname,rel_only=rel_only,vcolor=vcolors) - + BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors) + + def GenerateRelationshipTree(S,M): # Create a tree which mimics the scoreboard for relationships, but @@ -694,6 +656,7 @@ def GenerateRelationshipTree(S,M): if "Overall Score global" not in grp.variables.keys(): continue h2.score[i] = grp.variables["Overall Score global"][...] + return rel_tree diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index fcfa33f9..18ad3c80 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -3,7 +3,7 @@ from mpl_toolkits.basemap import Basemap import matplotlib.colors as colors from pylab import get_cmap -from cf_units import Unit +from cfunits import Units import ilamblib as il import Post as post import numpy as np @@ -220,17 +220,6 @@ def __str__(self): return s - def nbytes(self): - r"""Estimate the memory usage of a variable in bytes. - """ - nbytes = 0. - for key in self.__dict__.keys(): - try: - nbytes += self.__dict__[key].nbytes - except: - pass - return nbytes - def integrateInTime(self,**keywords): r"""Integrates the variable over a given time period. @@ -297,7 +286,7 @@ def integrateInTime(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Unit(self.unit) + unit = Units(self.unit) name = self.name + "_integrated_over_time" if mean: @@ -311,18 +300,18 @@ def integrateInTime(self,**keywords): else: dt = dt.sum(axis=0) np.seterr(over='ignore',under='ignore') - integral = integral / dt + integral /= dt np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Unit("d")*unit - unit = Unit(unit0.format().split()[-1]) - integral = unit0.convert(integral,unit) + unit0 = Units("d")*unit + unit = Units(unit0.formatted().split()[-1]) + integral = Units.conform(integral,unit0,unit) return Variable(data = integral, - unit = "%s" % unit, + unit = unit.units, name = name, lat = self.lat, lat_bnds = self.lat_bnds, @@ -414,7 +403,7 @@ def integrateInDepth(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Unit(self.unit) + unit = Units(self.unit) name = self.name + "_integrated_over_depth" if mean: @@ -428,18 +417,18 @@ def integrateInDepth(self,**keywords): else: dz = dz.sum(axis=axis) np.seterr(over='ignore',under='ignore') - integral = integral / dz + integral /= dz np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Unit("m")*unit - unit = Unit(unit0.format().split()[-1]) - integral = unit0.convert(integral,unit) + unit0 = Units("m")*unit + unit = Units(unit0.formatted().split()[-1]) + integral = Units.conform(integral,unit0,unit) return Variable(data = integral, - unit = "%s" % unit, + unit = unit.units, name = name, time = self.time, time_bnds = self.time_bnds, @@ -532,13 +521,13 @@ def _integrate(var,areas): integral = _integrate(self.data,measure) if mean: np.seterr(under='ignore') - integral = integral / measure.sum() + integral /= measure.sum() np.seterr(under='raise') # handle the name and unit name = self.name + "_integrated_over_space" if region is not None: name = name.replace("space",region) - unit = Unit(self.unit) + unit = Units(self.unit) if mean: # we have already divided thru by the non-masked area in @@ -547,12 +536,12 @@ def _integrate(var,areas): else: # if not a mean, we need to potentially handle unit conversions - unit0 = Unit("m2")*unit - unit = Unit(unit0.format().split()[-1]) - integral = unit0.convert(integral,unit) + unit0 = Units("m2")*unit + unit = Units(unit0.formatted().split()[-1]) + integral = Units.conform(integral,unit0,unit) return Variable(data = np.ma.masked_array(integral), - unit = "%s" % unit, + unit = unit.units, time = self.time, time_bnds = self.time_bnds, depth = self.depth, @@ -721,7 +710,7 @@ def _make_bnds(x): bnds[0] = max(x[0] -0.5*(x[ 1]-x[ 0]),-180) bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) return bnds - assert Unit(var.unit) == Unit(self.unit) + assert Units(var.unit) == Units(self.unit) assert self.temporal == False assert self.ndata == var.ndata assert self.layered == False @@ -763,7 +752,7 @@ def _make_bnds(x): def convert(self,unit,density=998.2): """Convert the variable to a given unit. - We use the UDUNITS library via the cf_units python interface to + We use the UDUNITS library via the cfunits python interface to convert the variable's unit. Additional support is provided for unit conversions in which substance information is required. For example, in quantities such as precipitation it @@ -788,53 +777,53 @@ def convert(self,unit,density=998.2): this object with its unit converted """ - if unit is None: return self - src_unit = Unit(self.unit) - tar_unit = Unit( unit) + src_unit = Units(self.unit) + tar_unit = Units( unit) mask = self.data.mask # Define some generic quantities - linear = Unit("m") - linear_rate = Unit("m s-1") - area_density = Unit("kg m-2") - area_density_rate = Unit("kg m-2 s-1") - mass_density = Unit("kg m-3") - volume_conc = Unit("mol m-3") - mass_conc = Unit("mol kg-1") - - # UDUNITS doesn't handle frequently found temperature expressions + linear = Units("m") + linear_rate = Units("m s-1") + area_density = Units("kg m-2") + area_density_rate = Units("kg m-2 s-1") + mass_density = Units("kg m-3") + volume_conc = Units("mol m-3") + mass_conc = Units("mol kg-1") + + # cfunits doesn't handle frequently found temperature expressions synonyms = {"K":"degK", "R":"degR", "C":"degC", "F":"degF"} for syn in synonyms.keys(): - if src_unit.format() == syn: src_unit = Unit(synonyms[syn]) - if tar_unit.format() == syn: tar_unit = Unit(synonyms[syn]) + if src_unit.units == syn: src_unit = Units(synonyms[syn]) + if tar_unit.units == syn: tar_unit = Units(synonyms[syn]) # Do we need to multiply by density? - if ( (src_unit.is_convertible(linear_rate) and tar_unit.is_convertible(area_density_rate)) or - (src_unit.is_convertible(linear ) and tar_unit.is_convertible(area_density )) or - (src_unit.is_convertible(mass_conc ) and tar_unit.is_convertible(volume_conc )) ): + if ( (src_unit.equivalent(linear_rate) and tar_unit.equivalent(area_density_rate)) or + (src_unit.equivalent(linear ) and tar_unit.equivalent(area_density )) or + (src_unit.equivalent(mass_conc ) and tar_unit.equivalent(volume_conc )) ): np.seterr(over='ignore',under='ignore') self.data *= density np.seterr(over='raise',under='raise') src_unit *= mass_density # Do we need to divide by density? - if ( (tar_unit.is_convertible(linear_rate) and src_unit.is_convertible(area_density_rate)) or - (tar_unit.is_convertible(linear ) and src_unit.is_convertible(area_density )) or - (tar_unit.is_convertible(mass_conc ) and src_unit.is_convertible(volume_conc )) ): + if ( (tar_unit.equivalent(linear_rate) and src_unit.equivalent(area_density_rate)) or + (tar_unit.equivalent(linear ) and src_unit.equivalent(area_density )) or + (tar_unit.equivalent(mass_conc ) and src_unit.equivalent(volume_conc )) ): np.seterr(over='ignore',under='ignore') - self.data = self.data / density + self.data /= density np.seterr(over='raise',under='raise') - src_unit = src_unit / mass_density + src_unit /= mass_density # Convert units try: - self.data = src_unit.convert(self.data,tar_unit) + self.data = Units.conform(self.data,src_unit,tar_unit) self.data = np.ma.masked_array(self.data,mask=mask) self.unit = unit except: + print "var_name = %s, src_unit = %s, target_unit = %s " % (self.name,src_unit,tar_unit) raise il.UnitConversionError() return self @@ -1087,8 +1076,7 @@ def plot(self,ax,**keywords): land = keywords.get("land" ,0.875) water = keywords.get("water" ,0.750) pad = keywords.get("pad" ,5.0) - cbar = keywords.get("cbar" ,False) - + rem_mask = None r = Regions() if self.temporal and not self.spatial: @@ -1111,7 +1099,7 @@ def plot(self,ax,**keywords): # Mask out areas outside our region rem_mask = np.copy(self.data.mask) self.data.mask += r.getMask(region,self) - + # Find the figure geometry if self.ndata: LAT = np.ma.masked_array(self.lat,mask=self.data.mask,copy=True) @@ -1121,19 +1109,38 @@ def plot(self,ax,**keywords): LAT,LON = np.meshgrid(self.lat,self.lon,indexing='ij') LAT = np.ma.masked_array(LAT,mask=self.data.mask,copy=False) LON = np.ma.masked_array(LON,mask=self.data.mask,copy=False) + LAT = self.lat[(LAT.mask==False).any(axis=1)] + TF = (LON.mask==False).any(axis=0) + # do we need to shift longitudes to plot continuously + # over the dateline? + dateline = True if (TF[0] == TF[-1] == True and + (TF==False).any() and + LAT.min() < -45. and + LAT.max() > 45. ) else False + LON = self.lon[TF] + if dateline: LON = (LON>=0)*LON+(LON<0)*(LON+360) lat0 = LAT.min() ; latf = LAT.max() lon0 = LON.min() ; lonf = LON.max() latm = LAT.mean(); lonm = LON.mean() - area = (latf-lat0)*(lonf-lon0) - + if dateline: + LON = (LON <=180)*LON +(LON >180)*(LON -360) + lon0 = (lon0<=180)*lon0+(lon0>180)*(lon0-360) + lonf = (lonf<=180)*lonf+(lonf>180)*(lonf-360) + lonm = (lonm<=180)*lonm+(lonm>180)*(lonm-360) + area = (latf-lat0) + if dateline: + area *= (360-lonf+lon0) + else: + area *= (lonf-lon0) + # Setup the plot projection depending on data limits - bmap = Basemap(projection = 'robin', - lon_0 = 0, - ax = ax, - resolution = 'c') + bmap = Basemap(projection = 'robin', + lon_0 = lonm, + ax = ax, + resolution = 'c') if (lon0 < -170.) and (lonf > 170.): - if lat0 > 23.5: + if lat0 > 23.5: bmap = Basemap(projection = 'npstere', boundinglat = lat0-5., lon_0 = 0., @@ -1146,7 +1153,7 @@ def plot(self,ax,**keywords): ax = ax, resolution = 'c') else: - if area < 10000.: + if area < 10000. and not dateline: bmap = Basemap(projection = 'cyl', llcrnrlon = lon0-2*pad, llcrnrlat = lat0- pad, @@ -1176,14 +1183,11 @@ def plot(self,ax,**keywords): clrs = clmp(norm) size = 35 ax = bmap.scatter(x,y,s=size,color=clrs,ax=ax,linewidths=0,cmap=cmap) - if cbar: - cb = bmap.colorbar(ax,location='bottom',pad="5%") - if label is not None: cb.set_label(label) if rem_mask is not None: self.data.mask = rem_mask return ax - def interpolate(self,time=None,lat=None,lon=None,lat_bnds=None,lon_bnds=None,itype='nearestneighbor'): + def interpolate(self,time=None,lat=None,lon=None,itype='nearestneighbor'): """Use nearest-neighbor interpolation to interpolate time and/or space at given values. Parameters @@ -1212,8 +1216,8 @@ def interpolate(self,time=None,lat=None,lon=None,lat_bnds=None,lon_bnds=None,ity if lat is None: lat = self.lat if lon is None: lon = self.lon if itype == 'nearestneighbor': - rows = (np.abs(lat[:,np.newaxis]-self.lat)).argmin(axis=1) - cols = (np.abs(lon[:,np.newaxis]-self.lon)).argmin(axis=1) + rows = np.apply_along_axis(np.argmin,1,np.abs(lat[:,np.newaxis]-self.lat)) + cols = np.apply_along_axis(np.argmin,1,np.abs(lon[:,np.newaxis]-self.lon)) args = [] if self.temporal: args.append(range(self.time.size)) if self.layered: args.append(range(self.depth.size)) @@ -1223,10 +1227,10 @@ def interpolate(self,time=None,lat=None,lon=None,lat_bnds=None,lon_bnds=None,ity mask = data.mask[ind] data = data.data[ind] data = np.ma.masked_array(data,mask=mask) - frac = self.area / il.CellAreas(self.lat,self.lon,self.lat_bnds,self.lon_bnds).clip(1e-12) + frac = self.area / il.CellAreas(self.lat,self.lon).clip(1e-12) frac = frac.clip(0,1) frac = frac[np.ix_(rows,cols)] - output_area = frac * il.CellAreas(lat,lon,lat_bnds,lon_bnds) + output_area = frac * il.CellAreas(lat,lon) elif itype == 'bilinear': from scipy.interpolate import RectBivariateSpline if self.data.ndim == 3: @@ -1610,7 +1614,7 @@ def spatialDistribution(self,var,region="global"): R0 = 1.0 std0 = std0.clip(1e-12) std = std .clip(1e-12) - std = std/std0 + std /= std0 score = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0)) except: std = np.asarray([0.0]) diff --git a/src/ILAMB/__init__.py b/src/ILAMB/__init__.py index 3bb5124a..f8ea82b9 100644 --- a/src/ILAMB/__init__.py +++ b/src/ILAMB/__init__.py @@ -1,6 +1,6 @@ __author__ = 'Nathan Collier' -__date__ = 'Jun 2018' -__version__ = '2.3' +__date__ = 'Nov 2017' +__version__ = '2.2' from distutils.version import LooseVersion import platform @@ -10,7 +10,7 @@ "numpy" : "1.9.2", "matplotlib" : "1.4.3", "netCDF4" : "1.1.4", - "cf_units" : "2.0.0", + "cfunits" : "1.1.4", "mpl_toolkits.basemap" : "1.0.7", "sympy" : "0.7.6", "mpi4py" : "1.3.1" diff --git a/src/ILAMB/constants.py b/src/ILAMB/constants.py index 8d77b8f6..714209d8 100644 --- a/src/ILAMB/constants.py +++ b/src/ILAMB/constants.py @@ -130,28 +130,6 @@ "sidelbl" :"RMSE SCORE", "haslegend" :True } -space_opts["iav"] = { "name" :"Interannual variability", - "cmap" :"Reds", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_iav.png", - "sidelbl" :"MODEL INTERANNUAL VARIABILITY", - "haslegend" :True } - -space_opts["iavscore"] = { "name" :"Interannual variability score", - "cmap" :"RdYlGn", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_iavscore.png", - "sidelbl" :"INTERANNUAL VARIABILITY SCORE", - "haslegend" :True } - space_opts["shift"] = { "name" :"Temporally integrated mean phase shift", "cmap" :"PRGn", "sym" :True, diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index b7cff096..7ce35304 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -3,11 +3,11 @@ from Regions import Regions from netCDF4 import Dataset,num2date,date2num from datetime import datetime -from cf_units import Unit +from cfunits import Units from copy import deepcopy from mpi4py import MPI import numpy as np -import logging,re +import logging logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) @@ -53,30 +53,7 @@ def __str__(self): return "NotLayeredVariable" class NotDatasiteVariable(Exception): def __str__(self): return "NotDatasiteVariable" -def FixDumbUnits(unit): - r"""Try to fix the dumb units people insist on using. - Parameters - ---------- - unit : str - the trial unit - - Returns - ------- - unit : str - the fixed unit - """ - # Various synonyms for 1 - if unit.lower().strip() in ["unitless", - "n/a", - "none"]: unit = "1" - # Remove the C which so often is used to mean carbon but actually means coulomb - tokens = re.findall(r"[\w']+", unit) - for token in tokens: - if token.endswith("C") and Unit(token[:-1]).is_convertible(Unit("g")): - unit = unit.replace(token,token[:-1]) - return unit - def GenerateDistinctColors(N,saturation=0.67,value=0.67): r"""Generates a series of distinct colors. @@ -109,7 +86,7 @@ def ConvertCalendar(t,tbnd=None): This routine converts the representation of time to the ILAMB default: days since 1850-1-1 00:00:00 on a 365-day calendar. This is so we can make comparisons with data from other models and - benchmarks. + benchmarks. We use cfunits time conversion capability. Parameters ---------- @@ -196,7 +173,7 @@ def _dpyShift(tmid,ta,dpy): t = ta.mean(axis=1) return t,ta -def CellAreas(lat,lon,lat_bnds=None,lon_bnds=None): +def CellAreas(lat,lon): """Given arrays of latitude and longitude, return cell areas in square meters. Parameters @@ -213,11 +190,6 @@ def CellAreas(lat,lon,lat_bnds=None,lon_bnds=None): """ from constants import earth_rad - if (lat_bnds is not None and lon_bnds is not None): - return earth_rad**2*np.outer((np.sin(lat_bnds[:,1]*np.pi/180.)- - np.sin(lat_bnds[:,0]*np.pi/180.)), - (lon_bnds[:,1]-lon_bnds[:,0])*np.pi/180.) - x = np.zeros(lon.size+1) x[1:-1] = 0.5*(lon[1:]+lon[:-1]) x[ 0] = lon[ 0]-0.5*(lon[ 1]-lon[ 0]) @@ -366,51 +338,71 @@ def SympifyWithArgsUnits(expression,args,units): """ from sympy import sympify,postorder_traversal - expression = sympify(expression) - - # try to convert all arguments to same units if possible, it - # catches most use cases + # The traversal needs that we make units commensurate when + # possible keys = args.keys() - for i,key0 in enumerate(keys): - for key in keys[(i+1):]: - try: - Unit(units[key]).convert(args[key],Unit(units[key0]),inplace=True) - units[key] = units[key0] - except: - pass - + for i in range(len(keys)): + ikey = keys[i] + for j in range(i+1,len(keys)): + jkey = keys[j] + if Units(units[jkey]).equivalent(Units(units[ikey])): + args [jkey] = Units.conform(args[jkey], + Units(units[jkey]), + Units(units[ikey]), + inplace=True) + units[jkey] = units[ikey] + + # We need to do what sympify does but also with unit + # conversions. So we traverse the expression tree in post order + # and take actions based on the kind of operation being performed. + expression = sympify(expression) for expr in postorder_traversal(expression): - ekey = str(expr) + + if expr.is_Atom: continue + ekey = str(expr) # expression key + if expr.is_Add: - # if there are scalars in the expression, these will not - # be in the units dictionary. Add them and give them an - # implicit unit of 1 - keys = [str(arg) for arg in expr.args] - for key in keys: - if not units.has_key(key): units[key] = "1" - - # if we are adding, all arguments must have the same unit. - key0 = keys[0] - for key in keys: - Unit(units[key]).convert(np.ones(1),Unit(units[key0])) - units[key] = units[key0] - units[ekey] = "%s" % (units[key0]) + # Addition will require that all args should be the same + # unit. As a convention, we will try to conform all units + # to the first variable's units. + key0 = None + for arg in expr.args: + key = str(arg) + if not args.has_key(key): continue + if key0 is None: + key0 = key + else: + # Conform these units to the units of the first arg + Units.conform(args[key], + Units(units[key]), + Units(units[key0]), + inplace=True) + units[key] = units[key0] + + args [ekey] = sympify(str(expr),locals=args) + units[ekey] = units[key0] elif expr.is_Pow: - # if raising to a power, just create the new unit - keys = [str(arg) for arg in expr.args] - units[ekey] = "(%s)%s" % (units[keys[0]],keys[1]) - + assert len(expr.args) == 2 # check on an assumption + power = float(expr.args[1]) + args [ekey] = args[str(expr.args[0])]**power + units[ekey] = Units(units[str(expr.args[0])]) + units[ekey] = units[ekey]**power + elif expr.is_Mul: - - # just create the new unit - keys = [str(arg) for arg in expr.args] - units[ekey] = " ".join(["(%s)" % units[key] for key in keys if units.has_key(key)]) - return sympify(str(expression),locals=args),units[ekey] - + unit = Units("1") + for arg in expr.args: + key = str(arg) + if units.has_key(key): unit *= Units(units[key]) + + args [ekey] = sympify(str(expr),locals=args) + units[ekey] = Units(unit).formatted() + + return args[ekey],units[ekey] + def ComputeIndexingArrays(lat2d,lon2d,lat,lon): """Blah. @@ -631,19 +623,8 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if lat_bnd_name is not None: lat_bnd = grp.variables[lat_bnd_name] [...] if lon_name is not None: lon = grp.variables[lon_name] [...] if lon_bnd_name is not None: lon_bnd = grp.variables[lon_bnd_name] [...] - if depth_name is not None: - dunit = None - if "units" in grp.variables[depth_name].ncattrs(): dunit = grp.variables[depth_name].units - depth = grp.variables[depth_name][...] - if depth_bnd_name is not None: - depth_bnd = grp.variables[depth_bnd_name][...] - if dunit is not None: - if not Unit(dunit).is_convertible(Unit("m")): - raise ValueError("Non-linear units [%s] of the layered dimension [%s] in %s" % (dunit,depth_name,filename)) - depth = Unit(dunit).convert(depth,Unit("m"),inplace=True) - if depth_bnd is not None: - depth_bnd = Unit(dunit).convert(depth_bnd,Unit("m"),inplace=True) - + if depth_name is not None: depth = grp.variables[depth_name] [...] + if depth_bnd_name is not None: depth_bnd = grp.variables[depth_bnd_name][...] if data_name is not None: data = len(grp.dimensions[data_name]) # if we have data sites, there may be lat/lon data to come @@ -704,15 +685,16 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if "missing_value" in var.ncattrs(): mask += (np.abs(v-var.missing_value)<1e-12) v = np.ma.masked_array(v,mask=mask,copy=False) + # handle units problems that cfunits doesn't if "units" in var.ncattrs(): - units = FixDumbUnits(var.units) + units = var.units.replace("unitless","1") else: units = "1" dset.close() return v,units,variable_name,t,t_bnd,lat,lat_bnd,lon,lon_bnd,depth,depth_bnd,cbounds,data -def Score(var,normalizer): +def Score(var,normalizer,FC=0.999999): """Remaps a normalized variable to the interval [0,1]. Parameters @@ -728,7 +710,16 @@ def Score(var,normalizer): name = name.replace("rmse","rmse_score") name = name.replace("iav" ,"iav_score") np.seterr(over='ignore',under='ignore') - data = np.exp(-np.abs(var.data/normalizer.data)) + + data = None + if "bias" in var.name or "diff" in var.name: + deno = np.ma.copy(normalizer.data) + if (deno.size - deno.mask.sum()) > 1: deno -= deno.min()*FC + data = np.exp(-np.abs(var.data/deno)) + elif "rmse" in var.name: + data = np.exp(-var.data/normalizer.data) + elif "iav" in var.name: + data = np.exp(-np.abs(var.data/normalizer.data)) data[data<1e-16] = 0. np.seterr(over='raise',under='raise') return Variable(name = name, @@ -788,22 +779,11 @@ def ScoreSeasonalCycle(phase_shift): unit = "1", name = phase_shift.name.replace("phase_shift","phase_shift_score"), ndata = phase_shift.ndata, - lat = phase_shift.lat, lat_bnds = phase_shift.lat_bnds, - lon = phase_shift.lon, lon_bnds = phase_shift.lon_bnds, + lat = phase_shift.lat, + lon = phase_shift.lon, area = phase_shift.area) -def _composeGrids(v1,v2): - lat_bnds = np.unique(np.hstack([v1.lat_bnds.flatten(),v2.lat_bnds.flatten()])) - lon_bnds = np.unique(np.hstack([v1.lon_bnds.flatten(),v2.lon_bnds.flatten()])) - lat_bnds = lat_bnds[(lat_bnds>=- 90)*(lat_bnds<=+ 90)] - lon_bnds = lon_bnds[(lon_bnds>=-180)*(lon_bnds<=+180)] - lat_bnds = np.vstack([lat_bnds[:-1],lat_bnds[+1:]]).T - lon_bnds = np.vstack([lon_bnds[:-1],lon_bnds[+1:]]).T - lat = lat_bnds.mean(axis=1) - lon = lon_bnds.mean(axis=1) - return lat,lon,lat_bnds,lon_bnds - -def AnalysisMeanStateSites(ref,com,**keywords): +def AnalysisMeanState(ref,com,**keywords): """Perform a mean state analysis. This mean state analysis examines the model mean state in space @@ -841,8 +821,6 @@ def AnalysisMeanStateSites(ref,com,**keywords): the unit to use when displaying output on plots on the HTML page """ - - from Variable import Variable regions = keywords.get("regions" ,["global"]) dataset = keywords.get("dataset" ,None) benchmark_dataset = keywords.get("benchmark_dataset",None) @@ -853,64 +831,33 @@ def AnalysisMeanStateSites(ref,com,**keywords): skip_rmse = keywords.get("skip_rmse" ,False) skip_iav = keywords.get("skip_iav" ,False) skip_cycle = keywords.get("skip_cycle" ,False) + res = keywords.get("res" ,0.5) ILAMBregions = Regions() - spatial = False + spatial = ref.spatial normalizer = None # Only study the annual cycle if it makes sense if not ref.monthly: skip_cycle = True if ref.time.size < 12: skip_cycle = True - if skip_rmse : skip_iav = True + # In order to deal with differences in grids and representation of + # land, we will interpolate both datasets to a fixed + # resolution. While not perfect we interpolate to a grid + # resolution higher than models are typically run. By convention, + # I will use capital letters for the interpolated quantities. if spatial: - lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) - REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) - COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + junk,junk,lat,lon = GlobalLatLonGrid(res) + REF = ref.interpolate(lat=lat,lon=lon,itype='bilinear') + COM = com.interpolate(lat=lat,lon=lon,itype='bilinear') # We find the mean values over the time period on the original # grid/datasites of each dataset ref_timeint = ref.integrateInTime(mean=True) com_timeint = com.integrateInTime(mean=True) if spatial: - REF_timeint = REF.integrateInTime(mean=True) COM_timeint = COM.integrateInTime(mean=True) - - # Masks - ref_mask = REF_timeint.data.mask - com_mask = COM_timeint.data.mask - ref_and_com = (ref_mask == False) * (com_mask == False) - ref_not_com = (ref_mask == False) * (com_mask == True ) - com_not_ref = (ref_mask == True ) * (com_mask == False) - ref_and_COM = Variable(name = "ref_and_COM", unit = ref.unit, - data = np.ma.masked_array(COM_timeint.data,mask=(ref_and_com==False)), - lat = lat, lat_bnds = lat_bnds, - lon = lon, lon_bnds = lon_bnds, - area = COM_timeint.area) - COM_not_ref = Variable(name = "COM_not_ref", unit = ref.unit, - data = np.ma.masked_array(COM_timeint.data,mask=(com_not_ref==False)), - lat = lat, lat_bnds = lat_bnds, - lon = lon, lon_bnds = lon_bnds, - area = COM_timeint.area) - REF_and_com = Variable(name = "REF_and_com", unit = REF.unit, - data = np.ma.masked_array(REF_timeint.data,mask=(ref_and_com==False)), - lat = lat, lat_bnds = lat_bnds, - lon = lon, lon_bnds = lon_bnds, - area = REF_timeint.area) - REF_not_com = Variable(name = "REF_not_com", unit = REF.unit, - data = np.ma.masked_array(REF_timeint.data,mask=(ref_not_com==False)), - lat = lat, lat_bnds = lat_bnds, - lon = lon, lon_bnds = lon_bnds, - area = REF_timeint.area) - - # Apply intersection mask - REF.data.mask += np.ones(REF.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) - COM.data.mask += np.ones(COM.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) - REF_timeint.data.mask = (ref_and_com==False) - COM_timeint.data.mask = (ref_and_com==False) - else: - REF = ref COM = com REF_timeint = ref_timeint @@ -920,58 +867,12 @@ def AnalysisMeanStateSites(ref,com,**keywords): # Compute the bias, RMSE, and RMS maps using the interpolated # quantities bias = REF_timeint.bias(COM_timeint) - cREF = Variable(name = "centralized %s" % REF.name, unit = REF.unit, - data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), - time = REF.time, time_bnds = REF.time_bnds, - lat = REF.lat , lat_bnds = REF.lat_bnds, - lon = REF.lon , lon_bnds = REF.lon_bnds, - area = REF.area, ndata = REF.ndata) - crms = cREF.rms () - bias_score_map = Score(bias,crms) - if spatial: - bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask + bias_score_map = Score(bias,REF_timeint) if not skip_rmse: - cCOM = Variable(name = "centralized %s" % COM.name, unit = COM.unit, - data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), - time = COM.time, time_bnds = COM.time_bnds, - lat = COM.lat , lat_bnds = COM.lat_bnds, - lon = COM.lon , lon_bnds = COM.lon_bnds, - area = COM.area, ndata = COM.ndata) - rmse = REF.rmse( COM) - crmse = cREF.rmse(cCOM) - rmse_score_map = Score(crmse,crms) - if not skip_iav: - ref_iav = Variable(name = "centralized %s" % ref.name, unit = ref.unit, - data = np.ma.masked_array(ref.data-ref_timeint.data[np.newaxis,...],mask=ref.data.mask), - time = ref.time, time_bnds = ref.time_bnds, - lat = ref.lat , lat_bnds = ref.lat_bnds, - lon = ref.lon , lon_bnds = ref.lon_bnds, - area = ref.area, ndata = ref.ndata).rms() - com_iav = Variable(name = "centralized %s" % com.name, unit = com.unit, - data = np.ma.masked_array(com.data-com_timeint.data[np.newaxis,...],mask=com.data.mask), - time = com.time, time_bnds = com.time_bnds, - lat = com.lat , lat_bnds = com.lat_bnds, - lon = com.lon , lon_bnds = com.lon_bnds, - area = com.area, ndata = com.ndata).rms() - REF_iav = Variable(name = "centralized %s" % REF.name, unit = REF.unit, - data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), - time = REF.time, time_bnds = REF.time_bnds, - lat = REF.lat , lat_bnds = REF.lat_bnds, - lon = REF.lon , lon_bnds = REF.lon_bnds, - area = REF.area, ndata = REF.ndata).rms() - COM_iav = Variable(name = "centralized %s" % COM.name, unit = COM.unit, - data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), - time = COM.time, time_bnds = COM.time_bnds, - lat = COM.lat , lat_bnds = COM.lat_bnds, - lon = COM.lon , lon_bnds = COM.lon_bnds, - area = COM.area, ndata = COM.ndata).rms() - iav_score_map = Score(Variable(name = "diff %s" % REF.name, unit = REF.unit, - data = (COM_iav.data-REF_iav.data), - lat = REF.lat , lat_bnds = REF.lat_bnds, - lon = REF.lon , lon_bnds = REF.lon_bnds, - area = REF.area, ndata = REF.ndata), - REF_iav) - + rmse = REF.rmse(COM) + rms = REF.rms () + rmse_score_map = Score(rmse,rms) + # The phase shift comes from the interpolated quantities if not skip_cycle: ref_cycle = REF.annualCycle() @@ -985,21 +886,15 @@ def AnalysisMeanStateSites(ref,com,**keywords): # Scalars ref_period_mean = {}; ref_spaceint = {}; ref_mean_cycle = {}; ref_dtcycle = {} com_period_mean = {}; com_spaceint = {}; com_mean_cycle = {}; com_dtcycle = {} - bias_val = {}; bias_score = {}; rmse_val = {}; rmse_score = {} - space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {}; iav_score = {} - ref_union_mean = {}; ref_comp_mean = {} - com_union_mean = {}; com_comp_mean = {} + bias_val = {}; bias_score = {}; rmse_val = {}; rmse_score = {} + space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {} for region in regions: if spatial: - ref_period_mean[region] = ref_timeint .integrateInSpace(region=region,mean=space_mean) - ref_union_mean [region] = REF_and_com .integrateInSpace(region=region,mean=space_mean) - com_union_mean [region] = ref_and_COM .integrateInSpace(region=region,mean=space_mean) - ref_comp_mean [region] = REF_not_com .integrateInSpace(region=region,mean=space_mean) - com_comp_mean [region] = COM_not_ref .integrateInSpace(region=region,mean=space_mean) + ref_period_mean[region] = REF_timeint .integrateInSpace(region=region,mean=space_mean) ref_spaceint [region] = REF .integrateInSpace(region=region,mean=True) - com_period_mean[region] = com_timeint .integrateInSpace(region=region,mean=space_mean) + com_period_mean[region] = COM_timeint .integrateInSpace(region=region,mean=space_mean) com_spaceint [region] = COM .integrateInSpace(region=region,mean=True) - bias_val [region] = bias .integrateInSpace(region=region,mean=True) + bias_val [region] = bias .integrateInSpace(region=region,mean=space_mean) bias_score [region] = bias_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) if not skip_cycle: ref_mean_cycle[region] = ref_cycle .integrateInSpace(region=region,mean=True) @@ -1009,12 +904,10 @@ def AnalysisMeanStateSites(ref,com,**keywords): com_dtcycle [region] = deepcopy(com_mean_cycle[region]) com_dtcycle [region].data -= com_mean_cycle[region].data.mean() shift [region] = shift_map .integrateInSpace(region=region,mean=True,intabs=True) - shift_score [region] = shift_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + shift_score [region] = shift_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) if not skip_rmse: - rmse_val [region] = rmse .integrateInSpace(region=region,mean=True) + rmse_val [region] = rmse .integrateInSpace(region=region,mean=space_mean) rmse_score [region] = rmse_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) - if not skip_iav: - iav_score [region] = iav_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) space_std[region],space_cor[region],sd_score[region] = REF_timeint.spatialDistribution(COM_timeint,region=region) else: ref_period_mean[region] = ref_timeint .siteStats(region=region) @@ -1035,20 +928,16 @@ def AnalysisMeanStateSites(ref,com,**keywords): if not skip_rmse: rmse_val [region] = rmse .siteStats(region=region) rmse_score [region] = rmse_score_map .siteStats(region=region,weight=normalizer) - if not skip_iav: - iav_score [region] = iav_score_map .siteStats(region=region,weight=normalizer) - ref_period_mean[region].name = "Period Mean (original grids) %s" % (region) + ref_period_mean[region].name = "Period Mean %s" % (region) ref_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) - com_period_mean[region].name = "Period Mean (original grids) %s" % (region) + com_period_mean[region].name = "Period Mean %s" % (region) com_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) bias_val [region].name = "Bias %s" % (region) bias_score [region].name = "Bias Score %s" % (region) if not skip_rmse: rmse_val [region].name = "RMSE %s" % (region) rmse_score [region].name = "RMSE Score %s" % (region) - if not skip_iav: - iav_score [region].name = "Interannual Variability Score %s" % (region) if not skip_cycle: ref_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) ref_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) @@ -1057,11 +946,7 @@ def AnalysisMeanStateSites(ref,com,**keywords): shift [region].name = "Phase Shift %s" % (region) shift_score [region].name = "Seasonal Cycle Score %s" % (region) if spatial: - ref_union_mean[region].name = "Benchmark Period Mean (intersection) %s" % (region) - com_union_mean[region].name = "Model Period Mean (intersection) %s" % (region) - ref_comp_mean [region].name = "Benchmark Period Mean (complement) %s" % (region) - com_comp_mean [region].name = "Model Period Mean (complement) %s" % (region) - sd_score [region].name = "Spatial Distribution Score %s" % (region) + sd_score [region].name = "Spatial Distribution Score %s" % (region) # Unit conversions def _convert(var,unit): @@ -1071,13 +956,12 @@ def _convert(var,unit): var.convert(unit) if table_unit is not None: - for var in [ref_period_mean,com_period_mean,ref_union_mean,com_union_mean,ref_comp_mean,com_comp_mean]: + for var in [ref_period_mean,com_period_mean,bias_val,rmse_val]: _convert(var,table_unit) if plot_unit is not None: - plot_vars = [com_timeint,ref_timeint,bias,com_spaceint,ref_spaceint,bias_val] - if not skip_rmse: plot_vars += [rmse,rmse_val] - if not skip_cycle: plot_vars += [com_mean_cycle,ref_mean_cycle,com_dtcycle,ref_dtcycle] - if not skip_iav: plot_vars += [com_iav] + plot_vars = [com_timeint,COM_timeint,ref_timeint,REF_timeint,bias,com_spaceint,ref_spaceint] + if not skip_rmse: plot_vars.append(rmse) + if not skip_cycle: plot_vars +=[com_mean_cycle,ref_mean_cycle,com_dtcycle,ref_dtcycle] for var in plot_vars: _convert(var,plot_unit) # Rename and optionally dump out information to netCDF4 files @@ -1086,10 +970,6 @@ def _convert(var,unit): bias_score_map .name = "biasscore_map_of_%s" % ref.name out_vars = [com_period_mean, - ref_union_mean, - com_union_mean, - ref_comp_mean, - com_comp_mean, com_timeint, com_mean_cycle, com_dtcycle, @@ -1100,6 +980,9 @@ def _convert(var,unit): shift, shift_score] if com_spaceint[com_spaceint.keys()[0]].data.size > 1: out_vars.append(com_spaceint) + if spatial: + COM_timeint.name = "timeintremap_of_%s" % ref.name + out_vars.append(COM_timeint) if not skip_cycle: com_maxt_map .name = "phase_map_of_%s" % ref.name shift_map .name = "shift_map_of_%s" % ref.name @@ -1109,17 +992,13 @@ def _convert(var,unit): out_vars.append(shift_score_map) if not skip_rmse: rmse .name = "rmse_map_of_%s" % ref.name + rms .name = "rms_map_of_%s" % ref.name rmse_score_map.name = "rmsescore_map_of_%s" % ref.name out_vars.append(rmse) + out_vars.append(rms ) out_vars.append(rmse_score_map) out_vars.append(rmse_val) out_vars.append(rmse_score) - if not skip_iav: - com_iav.name = "iav_map_of_%s" % ref.name - iav_score_map.name = "iavscore_map_of_%s" % ref.name - out_vars.append(com_iav) - out_vars.append(iav_score_map) - out_vars.append(iav_score) if dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1138,9 +1017,9 @@ def _convert(var,unit): if not skip_cycle: ref_maxt_map.name = "phase_map_of_%s" % ref.name out_vars += [ref_maxt_map,ref_mean_cycle,ref_dtcycle] - if not skip_iav: - ref_iav.name = "iav_map_of_%s" % ref.name - out_vars.append(ref_iav) + if spatial: + REF_timeint.name = "timeintremap_of_%s" % ref.name + out_vars.append(REF_timeint) if benchmark_dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1149,303 +1028,124 @@ def _convert(var,unit): var.toNetCDF4(benchmark_dataset,group="MeanState") return - - -def AnalysisMeanStateSpace(ref,com,**keywords): - """Perform a mean state analysis. - This mean state analysis examines the model mean state in space - and time. We compute the mean variable value over the time period - at each spatial cell or data site as appropriate, as well as the - bias and RMSE relative to the observational variable. We will - output maps of the period mean values and bias. For each spatial - cell or data site we also estimate the phase of the variable by - finding the mean time of year when the maximum occurs and the - phase shift by computing the difference in phase with respect to - the observational variable. In the spatial dimension, we compute a - spatial mean for each of the desired regions and an average annual - cycle. + +def AnalysisRelationship(dep_var,ind_var,dataset,rname,**keywords): + """Perform a relationship analysis. + + Expand to provide details of what exactly is done. Parameters ---------- - obs : ILAMB.Variable.Variable - the observational (reference) variable - mod : ILAMB.Variable.Variable - the model (comparison) variable - regions : list of str, optional - the regions overwhich to apply the analysis - dataset : netCDF4.Dataset, optional + dep_var : ILAMB.Variable.Variable + the dependent variable + ind_var : ILAMB.Variable.Variable + the independent variable + dataset : netCDF4.Dataset a open dataset in write mode for caching the results of the analysis which pertain to the model - benchmark_dataset : netCDF4.Dataset, optional - a open dataset in write mode for caching the results of the - analysis which pertain to the observations - space_mean : bool, optional - disable to compute sums of the variable over space instead of - mean values - table_unit : str, optional - the unit to use when displaying output in tables on the HTML page - plots_unit : str, optional - the unit to use when displaying output on plots on the HTML page - - """ - from Variable import Variable - regions = keywords.get("regions" ,["global"]) - dataset = keywords.get("dataset" ,None) - benchmark_dataset = keywords.get("benchmark_dataset",None) - space_mean = keywords.get("space_mean" ,True) - table_unit = keywords.get("table_unit" ,None) - plot_unit = keywords.get("plot_unit" ,None) - mass_weighting = keywords.get("mass_weighting" ,False) - skip_rmse = keywords.get("skip_rmse" ,False) - skip_iav = keywords.get("skip_iav" ,False) - skip_cycle = keywords.get("skip_cycle" ,False) - ILAMBregions = Regions() - spatial = ref.spatial - - # Convert str types to booleans - if type(skip_rmse) == type(""): - skip_rmse = (skip_rmse.lower() == "true") - if type(skip_iav ) == type(""): - skip_iav = (skip_iav .lower() == "true") - if type(skip_cycle) == type(""): - skip_cycle = (skip_cycle.lower() == "true") + rname : str + the name of the relationship under study + regions : list of str, optional + a list of units over which to apply the analysis + dep_plot_unit,ind_plot_unit : str, optional + the name of the unit to use in the plots found on the HTML output + + """ + def _extractMaxTemporalOverlap(v1,v2): # should move? + t0 = max(v1.time.min(),v2.time.min()) + tf = min(v1.time.max(),v2.time.max()) + for v in [v1,v2]: + begin = np.argmin(np.abs(v.time-t0)) + end = np.argmin(np.abs(v.time-tf))+1 + v.time = v.time[begin:end] + v.data = v.data[begin:end,...] + mask = v1.data.mask + v2.data.mask + v1 = v1.data[mask==0].flatten() + v2 = v2.data[mask==0].flatten() + return v1,v2 + + # grab regions + regions = keywords.get("regions",["global"]) - # Check if we need to skip parts of the analysis - if not ref.monthly : skip_cycle = True - if ref.time.size < 12: skip_cycle = True - if ref.time.size == 1: skip_rmse = True - if skip_rmse : skip_iav = True - name = ref.name - - # Interpolate both reference and comparison to a grid composed of - # their cell breaks - ref.convert(plot_unit) - com.convert(plot_unit) - lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) - REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) - COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) - unit = REF.unit - area = REF.area - ndata = REF.ndata - - # Find the mean values over the time period - ref_timeint = ref.integrateInTime(mean=True).convert(plot_unit) - com_timeint = com.integrateInTime(mean=True).convert(plot_unit) - REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) - COM_timeint = COM.integrateInTime(mean=True).convert(plot_unit) - normalizer = REF_timeint.data if mass_weighting else None - - # Report period mean values over all possible representations of - # land - ref_and_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == False) - ref_not_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == True ) - com_not_ref = (REF_timeint.data.mask == True ) * (COM_timeint.data.mask == False) - if benchmark_dataset is not None: - - ref_timeint.name = "timeint_of_%s" % name - ref_timeint.toNetCDF4(benchmark_dataset,group="MeanState") - for region in regions: - - # reference period mean on original grid - ref_period_mean = ref_timeint.integrateInSpace(region=region,mean=space_mean).convert(table_unit) - ref_period_mean.name = "Period Mean (original grids) %s" % region - ref_period_mean.toNetCDF4(benchmark_dataset,group="MeanState") - - if dataset is not None: + # convert to plot units + dep_plot_unit = keywords.get("dep_plot_unit",dep_var.unit) + ind_plot_unit = keywords.get("ind_plot_unit",ind_var.unit) + if dep_plot_unit is not None: dep_var.convert(dep_plot_unit) + if ind_plot_unit is not None: ind_var.convert(ind_plot_unit) + + # if the variables are temporal, we need to get period means + if dep_var.temporal: dep_var = dep_var.integrateInTime(mean=True) + if ind_var.temporal: ind_var = ind_var.integrateInTime(mean=True) + mask = dep_var.data.mask + ind_var.data.mask + + # analysis over regions + for region in regions: - com_timeint.name = "timeint_of_%s" % name - com_timeint.toNetCDF4(dataset,group="MeanState") - for region in regions: - - # reference period mean on intersection of land - ref_union_mean = Variable(name = "REF_and_com", unit = REF_timeint.unit, - data = np.ma.masked_array(REF_timeint.data,mask=(ref_and_com==False)), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = REF_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) - ref_union_mean.name = "Benchmark Period Mean (intersection) %s" % region - ref_union_mean.toNetCDF4(dataset,group="MeanState") - - # reference period mean on complement of land - ref_comp_mean = Variable(name = "REF_not_com", unit = REF_timeint.unit, - data = np.ma.masked_array(REF_timeint.data,mask=(ref_not_com==False)), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = REF_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) - ref_comp_mean.name = "Benchmark Period Mean (complement) %s" % region - ref_comp_mean.toNetCDF4(dataset,group="MeanState") - - # comparison period mean on original grid - com_period_mean = com_timeint.integrateInSpace(region=region,mean=space_mean).convert(table_unit) - com_period_mean.name = "Period Mean (original grids) %s" % region - com_period_mean.toNetCDF4(dataset,group="MeanState") - - # comparison period mean on intersection of land - com_union_mean = Variable(name = "ref_and_COM", unit = COM_timeint.unit, - data = np.ma.masked_array(COM_timeint.data,mask=(ref_and_com==False)), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = COM_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) - com_union_mean.name = "Model Period Mean (intersection) %s" % region - com_union_mean.toNetCDF4(dataset,group="MeanState") - - # comparison period mean on complement of land - com_comp_mean = Variable(name = "COM_not_ref", unit = COM_timeint.unit, - data = np.ma.masked_array(COM_timeint.data,mask=(com_not_ref==False)), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = COM_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) - com_comp_mean.name = "Model Period Mean (complement) %s" % region - com_comp_mean.toNetCDF4(dataset,group="MeanState") - - # Now that we are done reporting on the intersection / complement, - # set all masks to the intersection - REF.data.mask += np.ones(REF.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) - COM.data.mask += np.ones(COM.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) - REF_timeint.data.mask = (ref_and_com==False) - COM_timeint.data.mask = (ref_and_com==False) - if mass_weighting: normalizer.mask = (ref_and_com==False) - - # Spatial Distribution: scalars and scores - if dataset is not None: - for region in regions: - space_std,space_cor,sd_score = REF_timeint.spatialDistribution(COM_timeint,region=region) - sd_score.name = "Spatial Distribution Score %s" % region - sd_score.toNetCDF4(dataset,group="MeanState", - attributes={"std":space_std.data, - "R" :space_cor.data}) - - # Cycle: maps, scalars, and scores - if not skip_cycle: - ref_cycle = REF.annualCycle() - ref_maxt_map = ref_cycle.timeOfExtrema(etype="max") - ref_maxt_map.name = "phase_map_of_%s" % name - com_cycle = COM.annualCycle() - com_maxt_map = com_cycle.timeOfExtrema(etype="max") - com_maxt_map.name = "phase_map_of_%s" % name - shift_map = ref_maxt_map.phaseShift(com_maxt_map) - shift_map.name = "shift_map_of_%s" % name - shift_score_map = ScoreSeasonalCycle(shift_map) - shift_score_map.name = "shiftscore_map_of_%s" % name - shift_map.data /= 30.; shift_map.unit = "months" - if benchmark_dataset is not None: - ref_maxt_map.toNetCDF4(benchmark_dataset,group="MeanState") - for region in regions: - ref_mean_cycle = ref_cycle.integrateInSpace(region=region,mean=True) - ref_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) - ref_mean_cycle.toNetCDF4(benchmark_dataset,group="MeanState") - ref_dtcycle = deepcopy(ref_mean_cycle) - ref_dtcycle.data -= ref_mean_cycle.data.mean() - ref_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) - ref_dtcycle.toNetCDF4(benchmark_dataset,group="MeanState") - if dataset is not None: - com_maxt_map.toNetCDF4(dataset,group="MeanState") - shift_map .toNetCDF4(dataset,group="MeanState") - shift_score_map.toNetCDF4(dataset,group="MeanState") - for region in regions: - com_mean_cycle = com_cycle.integrateInSpace(region=region,mean=True) - com_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) - com_mean_cycle.toNetCDF4(dataset,group="MeanState") - com_dtcycle = deepcopy(com_mean_cycle) - com_dtcycle.data -= com_mean_cycle.data.mean() - com_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) - com_dtcycle.toNetCDF4(dataset,group="MeanState") - shift = shift_map.integrateInSpace(region=region,mean=True,intabs=True) - shift_score = shift_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - shift .name = "Phase Shift %s" % region - shift .toNetCDF4(dataset,group="MeanState") - shift_score.name = "Seasonal Cycle Score %s" % region - shift_score.toNetCDF4(dataset,group="MeanState") - - del ref_cycle,com_cycle,shift_map,shift_score_map + lats,lons = ILAMBregions[region] + rmask = (np.outer((dep_var.lat>lats[0])*(dep_var.latlons[0])*(dep_var.lon 1 else REF_timeint) - bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask - if dataset is not None: - bias.name = "bias_map_of_%s" % name - bias.toNetCDF4(dataset,group="MeanState") - bias_score_map.name = "biasscore_map_of_%s" % name - bias_score_map.toNetCDF4(dataset,group="MeanState") - for region in regions: - bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) - bias_val.name = "Bias %s" % region - bias_val.toNetCDF4(dataset,group="MeanState") - bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - bias_score.name = "Bias Score %s" % region - bias_score.toNetCDF4(dataset,group="MeanState") - del bias,bias_score_map - - # Spatial mean: plots - if REF.time.size > 1: - if benchmark_dataset is not None: - for region in regions: - ref_spaceint = REF.integrateInSpace(region=region,mean=True) - ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) - ref_spaceint.toNetCDF4(benchmark_dataset,group="MeanState") - if dataset is not None: - for region in regions: - com_spaceint = COM.integrateInSpace(region=region,mean=True) - com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) - com_spaceint.toNetCDF4(dataset,group="MeanState") - - # RMSE: maps, scalars, and scores - if not skip_rmse: - rmse = REF.rmse(COM).convert(plot_unit) - del REF - cCOM = Variable(name = "centralized %s" % name, unit = unit, - data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), - time = COM.time, time_bnds = COM.time_bnds, - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = COM.area, ndata = COM.ndata).convert(plot_unit) - del COM - crmse = cREF.rmse(cCOM).convert(plot_unit) - del cREF - if skip_iav: del cCOM - rmse_score_map = Score(crmse,REF_iav) - if dataset is not None: - rmse.name = "rmse_map_of_%s" % name - rmse.toNetCDF4(dataset,group="MeanState") - rmse_score_map.name = "rmsescore_map_of_%s" % name - rmse_score_map.toNetCDF4(dataset,group="MeanState") - for region in regions: - rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) - rmse_val.name = "RMSE %s" % region - rmse_val.toNetCDF4(dataset,group="MeanState") - rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - rmse_score.name = "RMSE Score %s" % region - rmse_score.toNetCDF4(dataset,group="MeanState") - del rmse,crmse,rmse_score_map - - # IAV: maps, scalars, scores - if not skip_iav: - COM_iav = cCOM.rms() - del cCOM - iav_score_map = Score(Variable(name = "diff %s" % name, unit = unit, - data = (COM_iav.data-REF_iav.data), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = area, ndata = ndata), - REF_iav) - if benchmark_dataset is not None: - REF_iav.name = "iav_map_of_%s" % name - REF_iav.toNetCDF4(benchmark_dataset,group="MeanState") - if dataset is not None: - COM_iav.name = "iav_map_of_%s" % name - COM_iav.toNetCDF4(dataset,group="MeanState") - iav_score_map.name = "iavscore_map_of_%s" % name - iav_score_map.toNetCDF4(dataset,group="MeanState") - for region in regions: - iav_score = iav_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - iav_score.name = "Interannual Variability Score %s" % region - iav_score.toNetCDF4(dataset,group="MeanState") - del COM_iav,iav_score_map - del REF_iav - - return + # Write relationship to the dataset + grp.createDimension("ndata",size=xmean.size) + X = grp.createVariable("ind_mean","double",("ndata")) + X.setncattr("unit",ind_plot_unit) + M = grp.createVariable("dep_mean","double",("ndata")) + M.setncattr("unit",dep_plot_unit) + S = grp.createVariable("dep_std" ,"double",("ndata")) + X[...] = xmean + M[...] = ymean + S[...] = ystd def ClipTime(v,t0,tf): """Remove time from a variable based on input bounds. @@ -1531,10 +1231,10 @@ def MakeComparable(ref,com,**keywords): # If the reference is spatial, the comparison must be if ref.spatial and not com.spatial: - ref = ref.extractDatasites(com.lat,com.lon) - msg = "%s The reference dataset is spatial but the comparison is site-based. " % logstring - msg += "Extracted %s sites from the reference to match the comparison." % ref.ndata - logger.info(msg) + msg = "%s Datasets are not uniformly spatial: " % logstring + msg += "reference = %s, comparison = %s" % (ref.spatial,com.spatial) + logger.debug(msg) + raise VarsNotComparable() # If the reference is layered, the comparison must be if ref.layered and not com.layered: @@ -1614,7 +1314,7 @@ def MakeComparable(ref,com,**keywords): # comparison, coarsen the comparison if np.log10(ref.dt/com.dt) > 0.5: com = com.coarsenInTime(ref.time_bnds,window=window) - + # Time bounds of the reference dataset t0 = ref.time_bnds[ 0,0] tf = ref.time_bnds[-1,1] @@ -1672,6 +1372,15 @@ def MakeComparable(ref,com,**keywords): logger.debug(msg) raise VarsNotComparable() + # Apply the reference mask to the comparison dataset and + # optionally vice-versa + if not ref.layered: + mask = ref.interpolate(time=com.time,lat=com.lat,lon=com.lon) + com.data.mask += mask.data.mask + if mask_ref: + mask = com.interpolate(time=ref.time,lat=ref.lat,lon=ref.lon) + ref.data.mask += mask.data.mask + # Convert the comparison to the units of the reference com = com.convert(ref.unit) diff --git a/test/scores_test.csv.gold b/test/scores_test.csv.gold index 6fccfb93..6eb4f400 100644 --- a/test/scores_test.csv.gold +++ b/test/scores_test.csv.gold @@ -1,9 +1,9 @@ Variables,CLM50r243CRUNCEP,CLM50r243GSWP3 -Biomass,0.5957104653413856,0.6783045750117078 -Gross Primary Productivity,0.6217211297637607,0.6126273585798891 -Global Net Ecosystem Carbon Balance,0.7054000637266042,0.8636690794621101 -Net Ecosystem Exchange,0.3941918077804778,0.38120476926634617 -Terrestrial Water Storage Anomaly,0.7000653021257858,0.7269702240175762 -Albedo,0.5434663466148166,0.544587485316599 -Surface Air Temperature,0.9256731031865132,0.9314748385926337 -Precipitation,0.7555153501937276,0.7679655805094326 +Biomass,0.612514900875,0.699490061835 +Gross Primary Productivity,0.758255946996,0.744520263877 +Global Net Ecosystem Carbon Balance,0.705400063727,0.863669079462 +Net Ecosystem Exchange,0.526032818281,0.505743500029 +Terrestrial Water Storage Anomaly,0.484324846664,0.470686677805 +Albedo,0.790786018141,0.793648623556 +Surface Air Temperature,0.983463847057,0.986084928759 +Precipitation,0.82458204587,0.849301357195 From 0daad6cd31e656dcee898c0a5fd36067998ab4ee Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 3 Jul 2018 14:45:26 -0600 Subject: [PATCH 06/22] update for NPL --- Machines/geyser_modules | 3 - Machines/machine_postprocess.xml | 19 +---- Templates/batch_cheyenne.tmpl | 2 + Templates/batch_geyser.tmpl | 2 + Templates/postprocess.tmpl | 7 +- Tools/ration.log | 95 +++++++-------------- Tools/ration_script_geyser | 34 ++++++++ Tools/{ration_example.py => ration_test.py} | 0 8 files changed, 76 insertions(+), 86 deletions(-) create mode 100755 Tools/ration_script_geyser rename Tools/{ration_example.py => ration_test.py} (100%) diff --git a/Machines/geyser_modules b/Machines/geyser_modules index b32ab6fa..ddbb2b80 100755 --- a/Machines/geyser_modules +++ b/Machines/geyser_modules @@ -1,5 +1,4 @@ #!/bin/bash -l -. /etc/bash.bashrc echo "Python boot-strap modules for machine geyser" @@ -11,8 +10,6 @@ module load impi module load netcdf/4.6.1 module load nco/4.7.4 module load ncl/6.4.0 -# still need pynio and pyngl installed in the geyser virtualenv -# Brian V. will work to install # clone the geyser virtualenv first with helper script ncar_pylib # use "ncar_pylib --help" to see all options diff --git a/Machines/machine_postprocess.xml b/Machines/machine_postprocess.xml index 561eee3c..9d669be5 100644 --- a/Machines/machine_postprocess.xml +++ b/Machines/machine_postprocess.xml @@ -16,26 +16,15 @@ module purge - module load python/2.7.7 - module load intel/16.0.3 + module load intel/17.0.1 module load ncarenv - module load ncarbinlibs module load ncarcompilers - module load slurm/17 - module load impi/5.1.1.109 - module load numpy/1.11.0 - module load scipy/0.18.1 - module load mpi4py/2.0.0-impi - module load pynio/1.4.1 - module load pyside/1.1.2 - module load matplotlib/1.5.1 - module load netcdf/4.3.0 - module load nco/4.4.4 - module load netcdf4python/1.2.4 + module load impi + module load netcdf/4.6.1 + module load nco/4.7.4 module load ncl/6.4.0 - module load pyngl/1.4.0 diff --git a/Templates/batch_cheyenne.tmpl b/Templates/batch_cheyenne.tmpl index 4d1d8041..6168cd6e 100644 --- a/Templates/batch_cheyenne.tmpl +++ b/Templates/batch_cheyenne.tmpl @@ -1,3 +1,5 @@ +#! /usr/bin/env/bash + #PBS -N {{ processName }} #PBS -q {{ queue }} #PBS -l select={{ nodes }}:ncpus={{ ppn }}:mpiprocs={{ ppn }} diff --git a/Templates/batch_geyser.tmpl b/Templates/batch_geyser.tmpl index 4f0b7287..feb0d5fe 100644 --- a/Templates/batch_geyser.tmpl +++ b/Templates/batch_geyser.tmpl @@ -1,3 +1,5 @@ +#! /bin/bash -1 + #SBATCH -n {{ pes }} #SBATCH -N {{ nodes }} #SBATCH --ntasks-per-node={{ ppn }} diff --git a/Templates/postprocess.tmpl b/Templates/postprocess.tmpl index f7b5caa1..3f516a62 100644 --- a/Templates/postprocess.tmpl +++ b/Templates/postprocess.tmpl @@ -1,4 +1,5 @@ -#! /usr/bin/env bash +{{ batchdirectives }} + ########## ## ## See https://github.com/NCAR/CESM_postprocessing/wiki for details @@ -6,14 +7,12 @@ ## ########## -{{ batchdirectives }} - if [ ! -e {{ virtualEnvDir }} ]; then echo "*************************************************************************************" echo "CESM {{ processName }} exiting due to non-existant python virtual environment in" echo " {{ virtualEnvDir }}" echo "You must first run:" - echo "$SRCROOT/postprocessing/create_python_env.sh -machine [machine]" + echo "$POSTPROCESS_PATH/create_python_env.sh -machine [machine]" echo "*************************************************************************************" exit fi diff --git a/Tools/ration.log b/Tools/ration.log index 247a75d7..06a39061 100644 --- a/Tools/ration.log +++ b/Tools/ration.log @@ -1,64 +1,31 @@ -Execute poe command line: poe ./ration_example.py -0:0/4: Sent 0 -1:1/4: Recvd 0 -1:1/4: Recvd 1 -0:0/4: Sent 1 -0:0/4: Sent 2 -1:1/4: Recvd 2 -0:0/4: Sent 3 -1:1/4: Recvd 3 -0:0/4: Sent 4 -1:1/4: Recvd 4 -0:0/4: Sent 5 -1:1/4: Recvd 5 -0:0/4: Sent 6 -1:1/4: Recvd 6 -0:0/4: Sent 7 -1:1/4: Recvd 7 -0:0/4: Sent 8 -1:1/4: Recvd 8 -0:0/4: Sent 9 -1:1/4: Recvd 9 -0:0/4: Sent None -1:1/4: Recvd None -1:1/4: Out of loop -0:0/4: Sent None -0:0/4: Sent None -0:0/4: Out of loop -2:2/4: Recvd None -2:2/4: Out of loop -3:3/4: Recvd None -3:3/4: Out of loop -0:Done. -Execute poe command line: poe ./ration_example.py -0:0/4: Sent 0 -1:1/4: Recvd 0 -1:1/4: Recvd 1 -0:0/4: Sent 1 -0:0/4: Sent 2 -1:1/4: Recvd 2 -0:0/4: Sent 3 -1:1/4: Recvd 3 -0:0/4: Sent 4 -1:1/4: Recvd 4 -0:0/4: Sent 5 -0:0/4: Sent 6 -1:1/4: Recvd 6 -0:0/4: Sent 7 -2:2/4: Recvd 5 -0:0/4: Sent 8 -1:1/4: Recvd 8 -3:3/4: Recvd 7 -0:0/4: Sent 9 -2:2/4: Recvd 9 -0:0/4: Sent None -1:1/4: Recvd None -1:1/4: Out of loop -0:0/4: Sent None -3:3/4: Recvd None -3:3/4: Out of loop -0:0/4: Sent None -0:0/4: Out of loop -2:2/4: Recvd None -2:2/4: Out of loop -0:Done. +2/4: Recvd 0 +2/4: Recvd 3 +2/4: Recvd 6 +2/4: Recvd 9 +2/4: Recvd None +2/4: Out of loop +0/4: Sent 0 +0/4: Sent 1 +0/4: Sent 2 +0/4: Sent 3 +0/4: Sent 4 +0/4: Sent 5 +0/4: Sent 6 +0/4: Sent 7 +0/4: Sent 8 +0/4: Sent 9 +0/4: Sent None +0/4: Sent None +0/4: Sent None +0/4: Out of loop +Done. +1/4: Recvd 1 +1/4: Recvd 4 +1/4: Recvd 7 +1/4: Recvd None +1/4: Out of loop +3/4: Recvd 2 +3/4: Recvd 5 +3/4: Recvd 8 +3/4: Recvd None +3/4: Out of loop diff --git a/Tools/ration_script_geyser b/Tools/ration_script_geyser new file mode 100755 index 00000000..95da5860 --- /dev/null +++ b/Tools/ration_script_geyser @@ -0,0 +1,34 @@ +#!/bin/bash -l + +## test the mpi4py and ASAPPyTools utility on geyser with ncar_pylib virtualenv + +#SBATCH -t 00:05:00 +#SBATCH -n 4 +#SBATCH -N 2 +#SBATCH --ntasks-per-node=2 +#SBATCH -p dav +#SBATCH -J ration_test +#SBATCH -A P93300606 +#SBATCH -C geyser +#SBATCH --mem 1G +#SBATCH -e ration_test.err.%J +#SBATCH -o ration_test.out.%J + +export MP_LABELIO=yes + +module load python/2.7.14 + +. /glade2/work/aliceb/sandboxes/dev/postprocessing_geyser/cesm-env2/bin/activate + +srun ./ration_test.py >> ./ration.log + +status=$? +echo $status + +deactivate + +echo $status + + + + diff --git a/Tools/ration_example.py b/Tools/ration_test.py similarity index 100% rename from Tools/ration_example.py rename to Tools/ration_test.py From 1a774b8df1328594ec9854555ceb1c2e27588dc7 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 3 Jul 2018 14:45:40 -0600 Subject: [PATCH 07/22] Squashed 'ilamb/ilamb/' changes from f46c647..5f35ec3 5f35ec3 cf_units requires more recent numpy but does not have it in their requirements 4bf570e 2.3 release f1ce475 doc cleanup from cfunits move 3c88a3a move from cfunits to cf_units 515cfb1 added initial logic will excludes outliers when computing functional relationships eed6fb0 fixed a bug which would cause a crash if non-string global attributes were found in the netCDF files. Thanks to Declan Valters for the report 9197e39 added plot limits to the summary images 9af8e66 added a derived expression for twsa and required code support 919c5d6 updated the gold scores, fixed an error in handling the bias score in biomass, large differences possible 69d7d85 force a mask 0ab4b6c fixed some unit issues 70d6042 moved where some scalars are saved in the analysis 3348b64 neglected to put writes where we can be sure datasets are defined f59b823 restored site analysis, needs revamped 2c3136a finished memory refactor on the spatial analysis 78ce3e5 removed a duplicated function definition 0fb6a7f first pass at memory optimzation 40fa2e7 added a pass functionality to unit conversions 3968d67 added a nbytes routine to determine the size of Variables b152fd9 rework of sympifywithargs to reduce memory usage a9cefa0 changes to scores based on added iav 922d9ac removed numpy restriction cb7f921 removed /= because it is causing problems with numpy 1.14 c454c2d restored iav to the analysis b82ffa0 Merge branch 'ncollier/grace-change' 3673d67 changes in gold score to reflect methodology change 2420b0c change of directory name to reflect a last name and not a acronym 0a7636c added an attempt to purge C for carbon in units 23fdf42 added site plots to runoff and fixed twsa/runoff plots to show by basin in the all models tab acbdaca small fix to location of a dataset 49d8eab added a permafrost extent comparison 67c86b4 added a snow water equivalent dataset d110066 Merge branch 'master' of bitbucket.org:ncollier/ilamb 75a417a numpy 1.14.0 was found to give an error when using the /= operator on masked arrays. For now limiting the upper limit on the version f6c7692 Relaxed a requirement that the reference and comparison datasets must be uniformly spatial. This allows us to use a spatial observational dataset to compare against models run at sites. d81f144 added a reference explaining why we have changed the methodology 7a2e751 changes to grace methodology based on Swenson's suggestions f688075 missed a barrier which can be problematic when trying to render composite plots and compute relationships 3fae4d8 wrong import d58c5e4 changes in gold scores due to method change 4997958 rework and abstraction of relationship analysis, fixed warnings in plot generation e5d50a4 added more horizontal width per variable label length d19c523 added a --rel_only option to ilamb-run that will render summary plots without absolute scores 2968f2e safety if no relationships exist e4857ed fixed relationship plot ce53afd first pass at a tool for extracting scalar scores from files in csv format 4aa220d shifts in scoring of bias and rmse 3648557 conditional if no relationships exist git-subtree-dir: ilamb/ilamb git-subtree-split: 5f35ec3e07f7ea8441429ea0c3e864d1aa8340e1 --- README.rst | 41 ++- bin/ilamb-run | 9 +- bin/ilamb-table | 65 ++++ demo/ilamb.cfg | 26 ++ doc/install.rst | 4 +- setup.py | 10 +- src/ILAMB/ConfPermafrost.py | 223 +++++++++++++ src/ILAMB/ConfRunoff.py | 44 ++- src/ILAMB/ConfTWSA.py | 243 ++++++++------ src/ILAMB/Confrontation.py | 491 +++++++++++++++------------- src/ILAMB/ModelResult.py | 1 - src/ILAMB/Post.py | 28 +- src/ILAMB/Scoreboard.py | 130 ++------ src/ILAMB/Variable.py | 101 +++--- src/ILAMB/__init__.py | 6 +- src/ILAMB/constants.py | 22 ++ src/ILAMB/ilamblib.py | 633 ++++++++++++++++++++++++------------ test/scores_test.csv.gold | 16 +- 18 files changed, 1394 insertions(+), 699 deletions(-) create mode 100644 bin/ilamb-table create mode 100644 src/ILAMB/ConfPermafrost.py diff --git a/README.rst b/README.rst index 86120b54..ebbe75ee 100644 --- a/README.rst +++ b/README.rst @@ -31,27 +31,38 @@ Useful Information * `CLM `_ - land comparison against 3 CLM versions and 2 forcings * `CMIP5 `_ - land comparison against a collection of CMIP5 models * `IOMB `_ - ocean comparison against a few ocean models - + +* Paper `preprint `_ which + details the design and methodology employed in the ILAMB package * If you find the package or the ouput helpful in your research or development efforts, we kindly ask you to cite the following reference (DOI:10.18139/ILAMB.v002.00/1251621). -ILAMB 2.2 Release +ILAMB 2.3 Release ----------------- -We are pleased to announce version 2.2 of the ILAMB python package. Among many small bugfixes and enhancements, the new version contains the following new features: - -* A new installed command ``ilamb-fetch`` has been included which can be run to automatically download the observational datasets. Running this command after the data has been downloaded will check your collection for updates and consistency. -* A new installed command ``ilamb-doctor`` has been included which can be run with options similar to ``ilamb-run`` to help identify which values a particular configure file needs in order to run. -* ILAMB will now check the spatial extents of all the models present in the current run and clip away to the largest shared extent. This allows ILAMB to be applied to regional models. -* User-defined regions can now be added at runtime either by specifying latitude/longitude bounds, or a mask in a netCDF4 file. For specifics, consult the regions `tutorial `_. -* Added a runoff and evaporative fraction benchmark to the ILAMB canon, removed the GFED3 and GFED4 burned area data products. -* Added many more plots to the generic output including the RMSE and the score maps. -* The ILAMB core has been enhanced to better handle depths. This has enabled ocean comparisons among others. -* An initial collection of ocean datasets has been assembled in the ``demo/iomb.cfg`` file for ocean benchmarking. -* The plotting phase of ``ilamb-run`` may now be skipped with the ``--skip_plots`` option. -* Relationship overall scores are now available in an image on the main html output page. -* Additional `tutorials `_ have been added to explain these new features. +We are pleased to announce version 2.3 of the ILAMB python +package. Among many bugfixes and improvements we highlight these major +changes: + +* You may observe a large shift in some score values. In this version + we solidified our scoring methodology while writing a `paper + `_ which necesitated + reworking some of the scores. For details, see the linked paper. +* Made a memory optimization pass through the analysis routines. Peak + memory usage and the time at peak was reduced improving performance. +* Restructured the symbolic manipulation of derived variables to + greatly reduce the required memory. +* Moved from using cfunits to cf_units. Both are python wrappers + around the UDUNITS library, but cfunits is stagnant and placed a + lower limit to the version of the netCDF4 python wrappers we could + use. +* The scoring of the interannual variability was missed in the port + from version 1 to 2, we have added the metric. +* The terrestrial water storage anomaly GRACE metric was changed to + compare mean anomaly values over large river basins. For details see + the ILAMB paper. + Funding ------- diff --git a/bin/ilamb-run b/bin/ilamb-run index a98e05fd..03f7b9d5 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -378,6 +378,7 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): print (" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name) sys.stdout.flush() + comm.Barrier() for c in C: if not skip_plots: try: @@ -476,7 +477,8 @@ parser.add_argument('--model_setup', dest="model_setup", type=str, nargs='+',def help='list files model setup information') parser.add_argument('--skip_plots', dest="skip_plots", action="store_true", help='enable to skip the plotting phase') - +parser.add_argument('--rel_only', dest="rel_only", action="store_true", + help='enable only display relative differences in overall scores') args = parser.parse_args() if args.config is None: if rank == 0: @@ -505,7 +507,8 @@ S = Scoreboard(args.config[0], master = rank==0, verbose = not args.quiet, build_dir = args.build_dir[0], - extents = RestrictiveModelExtents(M)) + extents = RestrictiveModelExtents(M), + rel_only = args.rel_only) C = MatchRelationshipConfrontation(S.list()) Cf = FilterConfrontationList(C,args.confront) @@ -520,7 +523,7 @@ if args.logging: if rank == 0: logger.info(" " + " ".join(os.uname())) - for key in ["ILAMB","numpy","matplotlib","netCDF4","cfunits","sympy","mpi4py"]: + for key in ["ILAMB","numpy","matplotlib","netCDF4","cf_units","sympy","mpi4py"]: pkg = __import__(key) try: path = pkg.__path__[0] diff --git a/bin/ilamb-table b/bin/ilamb-table new file mode 100644 index 00000000..374b6abb --- /dev/null +++ b/bin/ilamb-table @@ -0,0 +1,65 @@ +#!/usr/bin/env python +""" +""" +from ILAMB.Scoreboard import Scoreboard +from netCDF4 import Dataset +import os,argparse,sys + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, + help='path to configuration file to use') +parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], + help='path of where to save the output') +parser.add_argument('--csv_file', dest="csv", metavar='csv', type=str, nargs=1,default=["table.csv"], + help='destination filename for the table') + +args = parser.parse_args() +if args.config is None: + print "\nError: You must specify a configuration file using the option --config\n" + sys.exit(1) + +S = Scoreboard(args.config[0],verbose=False,build_dir=args.build_dir[0]) + +region = "global" +scalar = "RMSE" +sname = "%s %s" % (scalar,region) +group = "MeanState" +table = {} +unit = {} +for c in S.list(): + for subdir, dirs, files in os.walk(c.output_path): + for fname in files: + if not fname.endswith(".nc"): continue + with Dataset(os.path.join(c.output_path,fname)) as dset: + if group not in dset.groups .keys(): continue + if "scalars" not in dset.groups[group].groups.keys(): continue + grp = dset.groups[group]["scalars"] + if sname not in grp.variables.keys(): continue + var = grp.variables[sname] + if not table.has_key(c.longname): + table[c.longname] = {} + unit [c.longname] = var.units + table[c.longname][dset.name] = var[...] + +# What models have data? +models = [] +for key in table.keys(): + for m in table[key].keys(): + if m not in models: models.append(m) +models.sort() + +# render a table of values in csv format +lines = ",".join(["Name","Units"] + models) +for c in S.list(): + if not table.has_key(c.longname): continue + line = "%s,%s" % (c.longname,unit[c.longname]) + for m in models: + if table[c.longname].has_key(m): + line += ",%g" % (table[c.longname][m]) + else: + line += "," + lines += "\n%s" % line + +with file(args.csv[0],mode="w") as f: + f.write(lines) + diff --git a/demo/ilamb.cfg b/demo/ilamb.cfg index 83692eb2..793227fd 100644 --- a/demo/ilamb.cfg +++ b/demo/ilamb.cfg @@ -284,6 +284,7 @@ skip_iav = True [h2: Terrestrial Water Storage Anomaly] variable = "twsa" alternate_vars = "tws" +derived = "pr-evspsbl-mrro" cmap = "Blues" weight = 5 ctype = "ConfTWSA" @@ -292,6 +293,31 @@ ctype = "ConfTWSA" source = "DATA/twsa/GRACE/twsa_0.5x0.5.nc" weight = 25 +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Snow Water Equivalent] +variable = "swe" +alternate_vars = "snw" +cmap = "Blues" +weight = 5 + +[CanSISE] +source = "DATA/swe/CanSISE/swe.nc" +weight = 25 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Permafrost] +variable = "tsl" + +[NSIDC] +ctype = "ConfPermafrost" +source = "DATA/permafrost/NSIDC/NSIDC_0.5x0.5.nc" +y0 = 1970. +yf = 2000. +Teps = 273.15 +dmax = 3.5 + ########################################################################### [h1: Radiation and Energy Cycle] diff --git a/doc/install.rst b/doc/install.rst index 7ccfaaf9..84a85043 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -25,7 +25,7 @@ include: * netCDF4_, a python/numpy interface to the netCDF C library (you must have the C library installed) * sympy_, a python library for symbolic mathematics * mpi4py_, a python wrapper around the MPI library (you must have a MPI implementation installed) -* cfunits_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) +* cf_units_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) I have designated that a few of these dependencies are python interfaces to C libraries and so the library must also be installed @@ -271,7 +271,7 @@ Next open the local copy of the file with a editor and search for .. _numpy: https://www.numpy.org/ .. _matplotlib: https://matplotlib.org/ .. _netCDF4: https://github.com/Unidata/netcdf4-python -.. _cfunits: https://bitbucket.org/cfpython/cfunits-python/ +.. _cf_units: https://github.com/SciTools/cf-units .. _basemap: https://github.com/matplotlib/basemap .. _sympy: https://www.sympy.org/ .. _mpi4py: https://pythonhosted.org/mpi4py/ diff --git a/setup.py b/setup.py index e36e7ff2..b189267f 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess import os -VERSION = '2.2' +VERSION = '2.3' def git_version(): """ @@ -97,12 +97,12 @@ def write_version_py(filename=os.path.join('src/ILAMB', 'generated_version.py')) keywords=['benchmarking','earth system modeling','climate modeling','model intercomparison'], packages=['ILAMB'], package_dir={'ILAMB' : 'src/ILAMB'}, - scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor'], - install_requires=['numpy>=1.9.2', + scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor','bin/ilamb-table'], + install_requires=['numpy>=1.11.0', 'matplotlib>=1.4.3', #'basemap>=1.0.7', # basemap is in pypi but broken, need to manually install - 'netCDF4>=1.1.4,<=1.2.4', # upper limit is for cfunits - 'cfunits>=1.1.4', + 'netCDF4>=1.1.4', + 'cf_units>=2.0.0', 'sympy>=0.7.6', 'mpi4py>=1.3.1', 'scipy>=0.9.0'] diff --git a/src/ILAMB/ConfPermafrost.py b/src/ILAMB/ConfPermafrost.py new file mode 100644 index 00000000..92c2ead4 --- /dev/null +++ b/src/ILAMB/ConfPermafrost.py @@ -0,0 +1,223 @@ +from Confrontation import Confrontation +from mpl_toolkits.basemap import Basemap +from Variable import Variable +from Post import ColorBar +import matplotlib.pyplot as plt +from netCDF4 import Dataset +import ilamblib as il +import numpy as np + +class ConfPermafrost(Confrontation): + + def __init__(self,**keywords): + + # Ugly, but this is how we call the Confrontation constructor + super(ConfPermafrost,self).__init__(**keywords) + + # Now we overwrite some things which are different here + self.layout + self.regions = ["global"] + self.layout.regions = self.regions + self.weight = { "Obs Score" : 1., + "Mod Score" : 1. } + for page in self.layout.pages: + page.setMetricPriority(["Total Area" , + "Overlap Area", + "Missed Area" , + "Excess Area" , + "Obs Score" , + "Mod Score" , + "Overall Score"]) + + def stageData(self,m): + + obs = Variable(filename = self.source, + variable_name = "permafrost_extent") + + # These parameters may be changed from the configure file + y0 = float(self.keywords.get("y0" ,1970.)) # [yr] beginning year to include in analysis + yf = float(self.keywords.get("yf" ,2000.)) # [yr] end year to include in analysis + dmax = float(self.keywords.get("dmax",3.5)) # [m] consider layers where depth in is the range [0,dmax] + Teps = float(self.keywords.get("Teps",273.15)) # [K] temperature below which we assume permafrost occurs + + t0 = (y0 -1850.)*365. + tf = (yf+1-1850.)*365. + mod = m.extractTimeSeries(self.variable, + initial_time = t0, + final_time = tf) + mod.trim(t = [t0 ,tf ], + lat = [obs.lat.min(),90 ], + d = [0 ,dmax]) + mod = mod.annualCycle() + Tmax = mod.data.max(axis=0) + table = np.zeros(Tmax.shape[-2:]) + table[...] = np.NAN + thaw = np.zeros(table.shape,dtype=bool) + for i in range(mod.depth_bnds.shape[0]-1,-1,-1): + thaw += (Tmax[i]>=Teps) + frozen = np.where((Tmax[i] 60: fsize = 10 - ax.set_ylabel(ylabel,fontsize=fsize) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() - - # add the figure to the HTML layout - if name == "Benchmark" and region == "global": - short_name = short_name.replace("global_","") - page.addFigure(c.longname, - "benchmark_" + short_name, - "Benchmark_RNAME_%s.png" % (short_name), - legend = False, - benchmark = False) - page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, - benchmark = False) - - # determine the 1D relationship curves - bins = np.linspace(ind_min,ind_max,nbin+1) - delta = 0.1*(bins[1]-bins[0]) - inds = np.digitize(x,bins) - ids = np.unique(inds).clip(1,bins.size-1) - xb = [] - yb = [] - eb = [] - for i in ids: - yt = y[inds==i] - xi = 0.5 - xb.append(xi*bins[i-1]+(1.-xi)*bins[i]) - yb.append(yt.mean()) - try: - eb.append(yt.std()) # for some reason this fails sometimes - except: - eb.append(np.sqrt(((yt-yb[-1])**2).sum()/float(yt.size))) - - if name == "Benchmark": - obs_x = np.asarray(xb) - obs_y = np.asarray(yb) - obs_e = np.asarray(eb) - else: - mod_x = np.asarray(xb) - mod_y = np.asarray(yb) - mod_e = np.asarray(eb) - - # compute and plot the difference - O = np.array(obs_dist.data) - M = np.array(mod_dist.data) - O[np.where(obs_dist.mask)] = 0. - M[np.where(mod_dist.mask)] = 0. - dif_dist = np.ma.masked_array(M-O,mask=obs_dist.mask*mod_dist.mask) - lim = np.abs(dif_dist).max() - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - pc = ax.pcolormesh(xedges, - yedges, - dif_dist, - cmap = "Spectral_r", - vmin = -lim, - vmax = +lim) - div = make_axes_locatable(ax) - fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), - orientation="vertical", - label="Distribution Difference") - ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) - ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_diff_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="r+") as results: + + # Grab/create a relationship and scalars group + group = None + if "Relationships" not in results.groups: + group = results.createGroup("Relationships") + else: + group = results.groups["Relationships"] + if "scalars" not in group.groups: + scalars = group.createGroup("scalars") + else: + scalars = group.groups["scalars"] + + # for each relationship... + for c in self.relationships: + + # try to get the independent data from the model and obs + try: + ref_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) + com_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) + ind_name = c.longname.split("/")[0] + ind_min = c.limits["timeint"]["min"]-1e-12 + ind_max = c.limits["timeint"]["max"]+1e-12 + except: + continue + + # Add figures to the html page page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, + "benchmark_rel_%s" % ind_name, + "Benchmark_RNAME_rel_%s.png" % ind_name, + legend = False, benchmark = False) - - # score the distributions = 1 - Hellinger distance - score = 1.-np.sqrt(((np.sqrt(obs_dist)-np.sqrt(mod_dist))**2).sum())/np.sqrt(2) - vname = '%s Score %s' % (c.longname.split('/')[0],region) - #if vname in scalars.variables: - # scalars.variables[vname][0] = score - #else: - # Variable(name = vname, - # unit = "1", - # data = score).toNetCDF4(results,group="Relationships") - - # plot the 1D curve - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - ax.errorbar(obs_x-delta,obs_y,yerr=obs_e,fmt='-o',color='k') - ax.errorbar(mod_x+delta,mod_y,yerr=mod_e,fmt='-o',color=m.color) - ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) - ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_func_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, + "rel_%s" % ind_name, + "MNAME_RNAME_rel_%s.png" % ind_name, + legend = False, benchmark = False) + page.addFigure(c.longname, + "rel_diff_%s" % ind_name, + "MNAME_RNAME_rel_diff_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_func_%s" % ind_name, + "MNAME_RNAME_rel_func_%s.png" % ind_name, + legend = False, + benchmark = False) + + # Analysis over regions + lim_dep = [dep_min,dep_max] + lim_ind = [ind_min,ind_max] + longname = c.longname.split('/')[0] + for region in self.regions: + ref_dist = _buildDistributionResponse(ref_ind,ref_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + com_dist = _buildDistributionResponse(com_ind,com_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + + # Make the plots + _plotDistribution(ref_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), + "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) + _plotDistribution(com_dist[0],com_dist[1],com_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) + _plotDifference (ref_dist[0],com_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) + _plotFunction (ref_dist[3],ref_dist[4],com_dist[3],com_dist[4],ref_dist[1],ref_dist[2], + "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), + m.color, + os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) + + # Score the distribution + score = _scoreDistribution(ref_dist[0],com_dist[0]) + sname = "%s Hellinger Distance %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + + # Score the functional response + score = _scoreFunction(ref_dist[3],com_dist[3]) + sname = "%s RMSE Score %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + - # score the relationship - i0,i1 = np.where(np.abs(obs_x[:,np.newaxis]-mod_x)<1e-12) - obs_y = obs_y[i0]; mod_y = mod_y[i1] - isnan = np.isnan(obs_y)*np.isnan(mod_y) - obs_y[isnan] = 0.; mod_y[isnan] = 0. - score = np.exp(-np.linalg.norm(obs_y-mod_y)/np.linalg.norm(obs_y)) - vname = '%s RMSE Score %s' % (c.longname.split('/')[0],region) - if vname in scalars.variables: - scalars.variables[vname][0] = score - else: - Variable(name = vname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - results.close() + class FileContextManager(): diff --git a/src/ILAMB/ModelResult.py b/src/ILAMB/ModelResult.py index eff17ebf..6980d8b8 100644 --- a/src/ILAMB/ModelResult.py +++ b/src/ILAMB/ModelResult.py @@ -266,7 +266,6 @@ def derivedVariable(self,variable_name,expression,lats=None,lons=None,initial_ti """ from sympy import sympify - from cfunits import Units if expression is None: raise il.VarNotInModel() args = {} units = {} diff --git a/src/ILAMB/Post.py b/src/ILAMB/Post.py index 502f9f12..809b154d 100644 --- a/src/ILAMB/Post.py +++ b/src/ILAMB/Post.py @@ -223,11 +223,19 @@ def __init__(self,name,title): def __str__(self): r = Regions() - def _sortFigures(figure,priority=["benchmark_timeint","timeint","timeintremap","bias","rmse","benchmark_phase","phase","shift","biasscore","rmsescore","shiftscore","spatial_variance","legend_spatial_variance","spaceint","accumulate","cycle","dtcycle","compcycle","temporal_variance"]): + def _sortFigures(figure): + macro = ["timeint","bias","rmse","iav","phase","shift","variance","spaceint","accumulate","cycle"] val = 1. - for i,pname in enumerate(priority): - if pname == figure.name: val += 2**i - return val + for i,m in enumerate(macro): + if m in figure.name: val += 3**i + if figure.name.startswith("benchmark"): val -= 1. + if figure.name.endswith("score"): val += 1. + if figure.name.startswith("legend"): + if "variance" in figure.name: + val += 1. + else: + val = 0. + return val code = """
    @@ -834,11 +842,14 @@ def head(self): class HtmlLayout(): def __init__(self,pages,cname,years=None): - + self.pages = pages self.cname = cname.replace("/"," / ") if years is not None: - self.cname += " / %d-%d" % (years) + try: + self.cname += " / %d-%d" % (years) + except: + pass for page in self.pages: page.pages = self.pages page.cname = self.cname @@ -1048,7 +1059,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa nvariables = len(variables) maxV = max([len(v) for v in variables]) maxM = max([len(m) for m in models]) - wpchar = 0.1 + wpchar = 0.15 wpcell = 0.19 hpcell = 0.25 w = maxV*wpchar + max(4,nmodels)*wpcell @@ -1085,6 +1096,8 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa ax[0].set_yticklabels(variables[::-1]) ax[0].tick_params('both',length=0,width=0,which='major') ax[0].tick_params(axis='y',pad=10) + ax[0].set_xlim(0,nmodels) + ax[0].set_ylim(0,nvariables) if vcolor is not None: for i,t in enumerate(ax[0].yaxis.get_ticklabels()): t.set_backgroundcolor(vcolor[::-1][i]) @@ -1117,6 +1130,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa ax[i].set_xticklabels(models,rotation=90) ax[i].tick_params('both',length=0,width=0,which='major') ax[i].set_yticks([]) + ax[i].set_xlim(0,nmodels) ax[i].set_ylim(0,nvariables) if rel_only: ax[i].set_yticks (np.arange(nvariables)+0.5) diff --git a/src/ILAMB/Scoreboard.py b/src/ILAMB/Scoreboard.py index ba0b2cfb..5fceb929 100644 --- a/src/ILAMB/Scoreboard.py +++ b/src/ILAMB/Scoreboard.py @@ -5,6 +5,7 @@ from ConfEvapFraction import ConfEvapFraction from ConfIOMB import ConfIOMB from ConfDiurnal import ConfDiurnal +from ConfPermafrost import ConfPermafrost import os,re from netCDF4 import Dataset import numpy as np @@ -179,17 +180,19 @@ def ParseScoreboardConfigureFile(filename): "ConfRunoff" : ConfRunoff, "ConfEvapFraction": ConfEvapFraction, "ConfIOMB" : ConfIOMB, - "ConfDiurnal" : ConfDiurnal} + "ConfDiurnal" : ConfDiurnal, + "ConfPermafrost" : ConfPermafrost} class Scoreboard(): """ A class for managing confrontations """ - def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None): + def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None,rel_only=False): if not os.environ.has_key('ILAMB_ROOT'): raise ValueError("You must set the environment variable 'ILAMB_ROOT'") self.build_dir = build_dir + self.rel_only = rel_only if (master and not os.path.isdir(self.build_dir)): os.mkdir(self.build_dir) @@ -272,7 +275,7 @@ def createHtml(self,M,filename="index.html"): has_rel = np.asarray([len(rel.children) for rel in rel_tree.children]).sum() > 0 nav = "" if has_rel: - GenerateRelSummaryFigure(self,M,"%s/overview_rel.png" % self.build_dir) + GenerateRelSummaryFigure(rel_tree,M,"%s/overview_rel.png" % self.build_dir,rel_only=self.rel_only) nav = """
  • Relationship
  • """ #global global_print_node_string @@ -453,7 +456,7 @@ def createBarCharts(self,M): html = GenerateBarCharts(self.tree,M) def createSummaryFigure(self,M): - GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir) + GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir,rel_only=self.rel_only) def dumpScores(self,M,filename): out = file("%s/%s" % (self.build_dir,filename),"w") @@ -582,7 +585,7 @@ def GenerateTable(tree,M,S,composite=True): BuildHTMLTable(tree,M,S.build_dir) return global_html -def GenerateSummaryFigure(tree,M,filename): +def GenerateSummaryFigure(tree,M,filename,rel_only=False): models = [m.name for m in M] variables = [] @@ -602,96 +605,34 @@ def GenerateSummaryFigure(tree,M,filename): else: data[row,:] = var.score - BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors) - -def GenerateRelSummaryFigure(S,M,figname): - - def _parse(node): - global score,count,rows - if node.level != 5: return - row = "%s vs. %s" % (node.parent.parent.parent.name,node.parent.name) - col = node.name - if row not in rows: rows.append(row) - if not score .has_key(row): score[row] = {} - if not count .has_key(row): count[row] = {} - if not score[row].has_key(col): score[row][col] = 0. - if not count[row].has_key(col): count[row][col] = 0. - score[row][col] += node.score - count[row][col] += 1. - - class rnode(): - def __init__(self,name,level): - self.name = name - self.level = level - self.parent = None - self.score = None - self.children = [] - - - root = S.build_dir - tree = rnode("root",0) - previous_node = tree - current_level = 0 - - for subdir, dirs, files in os.walk(root): - if subdir == root: continue - flat = subdir.replace(root,"").lstrip("/").split("/") - level = len(flat) - name = flat[-1] - child = rnode(name,level) - if level == current_level: - child.parent = previous_node.parent - previous_node.parent.children.append(child) - if level == 3: - for fname in [f for f in files if f.endswith(".nc") and "Benchmark" not in f]: - with Dataset(os.path.join(subdir,fname)) as dset: - if "Relationships" not in dset.groups: continue - grp = dset.groups["Relationships"]["scalars"] - model = dset.name - for var in [var for var in grp.variables.keys() if ("Overall" not in var and - "global" in var)]: - rname = var.split(" ")[0] - hadrel = False - for c in child.children: - if c.name == rname: - rel = c - hadrel = True - if not hadrel: rel = rnode(rname,level+1) - mod = rnode(model,level+2) - mod.score = grp.variables[var][...] - mod.parent = rel - rel.children.append(mod) - rel.parent = child - if not hadrel: child.children.append(rel) - elif level > current_level: - child.parent = previous_node - previous_node.children.append(child) - current_level = level - else: - addto = tree - for i in range(level-1): addto = addto.children[-1] - child.parent = addto - addto.children.append(child) - current_level = level - previous_node = child - - global score,count,rows - score = {} - count = {} - rows = [] - TraversePreorder(tree,_parse) - models = [] - for row in rows: - for key in score[row].keys(): - if key not in models: models.append(key) - data = np.zeros((len(rows),len(models))) + BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors,rel_only=rel_only) + +def GenerateRelSummaryFigure(S,M,figname,rel_only=False): + + # reorganize the relationship data + scores = {} + counts = {} + rows = [] + vcolors = [] + for h1 in S.children: + for dep in h1.children: + dname = dep.name.split("/")[0] + for ind in dep.children: + iname = ind.name.split("/")[0] + key = "%s/%s" % (dname,iname) + if scores.has_key(key): + scores[key] += ind.score + counts[key] += 1. + else: + scores[key] = np.copy(ind.score) + counts[key] = 1. + rows .append(key) + vcolors.append(h1.bgcolor) + if len(rows) == 0: return + data = np.ma.zeros((len(rows),len(M))) for i,row in enumerate(rows): - for j,col in enumerate(models): - try: - data[i,j] = score[row][col] / count[row][col] - except: - data[i,j] = np.nan - BenchmarkSummaryFigure(models,rows,data,figname,rel_only=False) + data[i,:] = scores[row] / counts[row] + BenchmarkSummaryFigure([m.name for m in M],rows,data,figname,rel_only=rel_only,vcolor=vcolors) def GenerateRelationshipTree(S,M): @@ -753,7 +694,6 @@ def GenerateRelationshipTree(S,M): if "Overall Score global" not in grp.variables.keys(): continue h2.score[i] = grp.variables["Overall Score global"][...] - return rel_tree diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 7d1f48b8..fcfa33f9 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -3,7 +3,7 @@ from mpl_toolkits.basemap import Basemap import matplotlib.colors as colors from pylab import get_cmap -from cfunits import Units +from cf_units import Unit import ilamblib as il import Post as post import numpy as np @@ -220,6 +220,17 @@ def __str__(self): return s + def nbytes(self): + r"""Estimate the memory usage of a variable in bytes. + """ + nbytes = 0. + for key in self.__dict__.keys(): + try: + nbytes += self.__dict__[key].nbytes + except: + pass + return nbytes + def integrateInTime(self,**keywords): r"""Integrates the variable over a given time period. @@ -286,7 +297,7 @@ def integrateInTime(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Units(self.unit) + unit = Unit(self.unit) name = self.name + "_integrated_over_time" if mean: @@ -300,18 +311,18 @@ def integrateInTime(self,**keywords): else: dt = dt.sum(axis=0) np.seterr(over='ignore',under='ignore') - integral /= dt + integral = integral / dt np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("d")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("d")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = integral, - unit = unit.units, + unit = "%s" % unit, name = name, lat = self.lat, lat_bnds = self.lat_bnds, @@ -403,7 +414,7 @@ def integrateInDepth(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Units(self.unit) + unit = Unit(self.unit) name = self.name + "_integrated_over_depth" if mean: @@ -417,18 +428,18 @@ def integrateInDepth(self,**keywords): else: dz = dz.sum(axis=axis) np.seterr(over='ignore',under='ignore') - integral /= dz + integral = integral / dz np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("m")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("m")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = integral, - unit = unit.units, + unit = "%s" % unit, name = name, time = self.time, time_bnds = self.time_bnds, @@ -521,13 +532,13 @@ def _integrate(var,areas): integral = _integrate(self.data,measure) if mean: np.seterr(under='ignore') - integral /= measure.sum() + integral = integral / measure.sum() np.seterr(under='raise') # handle the name and unit name = self.name + "_integrated_over_space" if region is not None: name = name.replace("space",region) - unit = Units(self.unit) + unit = Unit(self.unit) if mean: # we have already divided thru by the non-masked area in @@ -536,12 +547,12 @@ def _integrate(var,areas): else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("m2")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("m2")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = np.ma.masked_array(integral), - unit = unit.units, + unit = "%s" % unit, time = self.time, time_bnds = self.time_bnds, depth = self.depth, @@ -710,7 +721,7 @@ def _make_bnds(x): bnds[0] = max(x[0] -0.5*(x[ 1]-x[ 0]),-180) bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) return bnds - assert Units(var.unit) == Units(self.unit) + assert Unit(var.unit) == Unit(self.unit) assert self.temporal == False assert self.ndata == var.ndata assert self.layered == False @@ -752,7 +763,7 @@ def _make_bnds(x): def convert(self,unit,density=998.2): """Convert the variable to a given unit. - We use the UDUNITS library via the cfunits python interface to + We use the UDUNITS library via the cf_units python interface to convert the variable's unit. Additional support is provided for unit conversions in which substance information is required. For example, in quantities such as precipitation it @@ -777,53 +788,53 @@ def convert(self,unit,density=998.2): this object with its unit converted """ - src_unit = Units(self.unit) - tar_unit = Units( unit) + if unit is None: return self + src_unit = Unit(self.unit) + tar_unit = Unit( unit) mask = self.data.mask # Define some generic quantities - linear = Units("m") - linear_rate = Units("m s-1") - area_density = Units("kg m-2") - area_density_rate = Units("kg m-2 s-1") - mass_density = Units("kg m-3") - volume_conc = Units("mol m-3") - mass_conc = Units("mol kg-1") - - # cfunits doesn't handle frequently found temperature expressions + linear = Unit("m") + linear_rate = Unit("m s-1") + area_density = Unit("kg m-2") + area_density_rate = Unit("kg m-2 s-1") + mass_density = Unit("kg m-3") + volume_conc = Unit("mol m-3") + mass_conc = Unit("mol kg-1") + + # UDUNITS doesn't handle frequently found temperature expressions synonyms = {"K":"degK", "R":"degR", "C":"degC", "F":"degF"} for syn in synonyms.keys(): - if src_unit.units == syn: src_unit = Units(synonyms[syn]) - if tar_unit.units == syn: tar_unit = Units(synonyms[syn]) + if src_unit.format() == syn: src_unit = Unit(synonyms[syn]) + if tar_unit.format() == syn: tar_unit = Unit(synonyms[syn]) # Do we need to multiply by density? - if ( (src_unit.equivalent(linear_rate) and tar_unit.equivalent(area_density_rate)) or - (src_unit.equivalent(linear ) and tar_unit.equivalent(area_density )) or - (src_unit.equivalent(mass_conc ) and tar_unit.equivalent(volume_conc )) ): + if ( (src_unit.is_convertible(linear_rate) and tar_unit.is_convertible(area_density_rate)) or + (src_unit.is_convertible(linear ) and tar_unit.is_convertible(area_density )) or + (src_unit.is_convertible(mass_conc ) and tar_unit.is_convertible(volume_conc )) ): np.seterr(over='ignore',under='ignore') self.data *= density np.seterr(over='raise',under='raise') src_unit *= mass_density # Do we need to divide by density? - if ( (tar_unit.equivalent(linear_rate) and src_unit.equivalent(area_density_rate)) or - (tar_unit.equivalent(linear ) and src_unit.equivalent(area_density )) or - (tar_unit.equivalent(mass_conc ) and src_unit.equivalent(volume_conc )) ): + if ( (tar_unit.is_convertible(linear_rate) and src_unit.is_convertible(area_density_rate)) or + (tar_unit.is_convertible(linear ) and src_unit.is_convertible(area_density )) or + (tar_unit.is_convertible(mass_conc ) and src_unit.is_convertible(volume_conc )) ): np.seterr(over='ignore',under='ignore') - self.data /= density + self.data = self.data / density np.seterr(over='raise',under='raise') - src_unit /= mass_density + src_unit = src_unit / mass_density # Convert units try: - self.data = Units.conform(self.data,src_unit,tar_unit) + self.data = src_unit.convert(self.data,tar_unit) self.data = np.ma.masked_array(self.data,mask=mask) self.unit = unit except: - print "var_name = %s, src_unit = %s, target_unit = %s " % (self.name,src_unit,tar_unit) raise il.UnitConversionError() return self @@ -1599,7 +1610,7 @@ def spatialDistribution(self,var,region="global"): R0 = 1.0 std0 = std0.clip(1e-12) std = std .clip(1e-12) - std /= std0 + std = std/std0 score = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0)) except: std = np.asarray([0.0]) diff --git a/src/ILAMB/__init__.py b/src/ILAMB/__init__.py index f8ea82b9..3bb5124a 100644 --- a/src/ILAMB/__init__.py +++ b/src/ILAMB/__init__.py @@ -1,6 +1,6 @@ __author__ = 'Nathan Collier' -__date__ = 'Nov 2017' -__version__ = '2.2' +__date__ = 'Jun 2018' +__version__ = '2.3' from distutils.version import LooseVersion import platform @@ -10,7 +10,7 @@ "numpy" : "1.9.2", "matplotlib" : "1.4.3", "netCDF4" : "1.1.4", - "cfunits" : "1.1.4", + "cf_units" : "2.0.0", "mpl_toolkits.basemap" : "1.0.7", "sympy" : "0.7.6", "mpi4py" : "1.3.1" diff --git a/src/ILAMB/constants.py b/src/ILAMB/constants.py index 714209d8..8d77b8f6 100644 --- a/src/ILAMB/constants.py +++ b/src/ILAMB/constants.py @@ -130,6 +130,28 @@ "sidelbl" :"RMSE SCORE", "haslegend" :True } +space_opts["iav"] = { "name" :"Interannual variability", + "cmap" :"Reds", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iav.png", + "sidelbl" :"MODEL INTERANNUAL VARIABILITY", + "haslegend" :True } + +space_opts["iavscore"] = { "name" :"Interannual variability score", + "cmap" :"RdYlGn", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iavscore.png", + "sidelbl" :"INTERANNUAL VARIABILITY SCORE", + "haslegend" :True } + space_opts["shift"] = { "name" :"Temporally integrated mean phase shift", "cmap" :"PRGn", "sym" :True, diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 1304be6a..b7cff096 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -3,11 +3,11 @@ from Regions import Regions from netCDF4 import Dataset,num2date,date2num from datetime import datetime -from cfunits import Units +from cf_units import Unit from copy import deepcopy from mpi4py import MPI import numpy as np -import logging +import logging,re logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) @@ -53,7 +53,30 @@ def __str__(self): return "NotLayeredVariable" class NotDatasiteVariable(Exception): def __str__(self): return "NotDatasiteVariable" +def FixDumbUnits(unit): + r"""Try to fix the dumb units people insist on using. + Parameters + ---------- + unit : str + the trial unit + + Returns + ------- + unit : str + the fixed unit + """ + # Various synonyms for 1 + if unit.lower().strip() in ["unitless", + "n/a", + "none"]: unit = "1" + # Remove the C which so often is used to mean carbon but actually means coulomb + tokens = re.findall(r"[\w']+", unit) + for token in tokens: + if token.endswith("C") and Unit(token[:-1]).is_convertible(Unit("g")): + unit = unit.replace(token,token[:-1]) + return unit + def GenerateDistinctColors(N,saturation=0.67,value=0.67): r"""Generates a series of distinct colors. @@ -86,7 +109,7 @@ def ConvertCalendar(t,tbnd=None): This routine converts the representation of time to the ILAMB default: days since 1850-1-1 00:00:00 on a 365-day calendar. This is so we can make comparisons with data from other models and - benchmarks. We use cfunits time conversion capability. + benchmarks. Parameters ---------- @@ -343,71 +366,51 @@ def SympifyWithArgsUnits(expression,args,units): """ from sympy import sympify,postorder_traversal - # The traversal needs that we make units commensurate when - # possible - keys = args.keys() - for i in range(len(keys)): - ikey = keys[i] - for j in range(i+1,len(keys)): - jkey = keys[j] - if Units(units[jkey]).equivalent(Units(units[ikey])): - args [jkey] = Units.conform(args[jkey], - Units(units[jkey]), - Units(units[ikey]), - inplace=True) - units[jkey] = units[ikey] - - # We need to do what sympify does but also with unit - # conversions. So we traverse the expression tree in post order - # and take actions based on the kind of operation being performed. expression = sympify(expression) + + # try to convert all arguments to same units if possible, it + # catches most use cases + keys = args.keys() + for i,key0 in enumerate(keys): + for key in keys[(i+1):]: + try: + Unit(units[key]).convert(args[key],Unit(units[key0]),inplace=True) + units[key] = units[key0] + except: + pass + for expr in postorder_traversal(expression): - - if expr.is_Atom: continue - ekey = str(expr) # expression key - + ekey = str(expr) if expr.is_Add: - # Addition will require that all args should be the same - # unit. As a convention, we will try to conform all units - # to the first variable's units. - key0 = None - for arg in expr.args: - key = str(arg) - if not args.has_key(key): continue - if key0 is None: - key0 = key - else: - # Conform these units to the units of the first arg - Units.conform(args[key], - Units(units[key]), - Units(units[key0]), - inplace=True) - units[key] = units[key0] - - args [ekey] = sympify(str(expr),locals=args) - units[ekey] = units[key0] + # if there are scalars in the expression, these will not + # be in the units dictionary. Add them and give them an + # implicit unit of 1 + keys = [str(arg) for arg in expr.args] + for key in keys: + if not units.has_key(key): units[key] = "1" - elif expr.is_Pow: + # if we are adding, all arguments must have the same unit. + key0 = keys[0] + for key in keys: + Unit(units[key]).convert(np.ones(1),Unit(units[key0])) + units[key] = units[key0] + units[ekey] = "%s" % (units[key0]) - assert len(expr.args) == 2 # check on an assumption - power = float(expr.args[1]) - args [ekey] = args[str(expr.args[0])]**power - units[ekey] = Units(units[str(expr.args[0])]) - units[ekey] = units[ekey]**power - - elif expr.is_Mul: + elif expr.is_Pow: - unit = Units("1") - for arg in expr.args: - key = str(arg) - if units.has_key(key): unit *= Units(units[key]) - - args [ekey] = sympify(str(expr),locals=args) - units[ekey] = Units(unit).formatted() + # if raising to a power, just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = "(%s)%s" % (units[keys[0]],keys[1]) - return args[ekey],units[ekey] + elif expr.is_Mul: + + # just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = " ".join(["(%s)" % units[key] for key in keys if units.has_key(key)]) + return sympify(str(expression),locals=args),units[ekey] + def ComputeIndexingArrays(lat2d,lon2d,lat,lon): """Blah. @@ -635,11 +638,11 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if depth_bnd_name is not None: depth_bnd = grp.variables[depth_bnd_name][...] if dunit is not None: - if not Units(dunit).equivalent(Units("m")): + if not Unit(dunit).is_convertible(Unit("m")): raise ValueError("Non-linear units [%s] of the layered dimension [%s] in %s" % (dunit,depth_name,filename)) - depth = Units.conform(depth,Units(dunit),Units("m"),inplace=True) + depth = Unit(dunit).convert(depth,Unit("m"),inplace=True) if depth_bnd is not None: - depth_bnd = Units.conform(depth_bnd,Units(dunit),Units("m"),inplace=True) + depth_bnd = Unit(dunit).convert(depth_bnd,Unit("m"),inplace=True) if data_name is not None: data = len(grp.dimensions[data_name]) @@ -701,16 +704,15 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if "missing_value" in var.ncattrs(): mask += (np.abs(v-var.missing_value)<1e-12) v = np.ma.masked_array(v,mask=mask,copy=False) - # handle units problems that cfunits doesn't if "units" in var.ncattrs(): - units = var.units.replace("unitless","1") + units = FixDumbUnits(var.units) else: units = "1" dset.close() return v,units,variable_name,t,t_bnd,lat,lat_bnd,lon,lon_bnd,depth,depth_bnd,cbounds,data -def Score(var,normalizer,FC=0.999999): +def Score(var,normalizer): """Remaps a normalized variable to the interval [0,1]. Parameters @@ -726,16 +728,7 @@ def Score(var,normalizer,FC=0.999999): name = name.replace("rmse","rmse_score") name = name.replace("iav" ,"iav_score") np.seterr(over='ignore',under='ignore') - - data = None - if "bias" in var.name or "diff" in var.name: - deno = np.ma.copy(normalizer.data) - if (deno.size - deno.mask.sum()) > 1: deno -= deno.min()*FC - data = np.exp(-np.abs(var.data/deno)) - elif "rmse" in var.name: - data = np.exp(-var.data/normalizer.data) - elif "iav" in var.name: - data = np.exp(-np.abs(var.data/normalizer.data)) + data = np.exp(-np.abs(var.data/normalizer.data)) data[data<1e-16] = 0. np.seterr(over='raise',under='raise') return Variable(name = name, @@ -810,7 +803,7 @@ def _composeGrids(v1,v2): lon = lon_bnds.mean(axis=1) return lat,lon,lat_bnds,lon_bnds -def AnalysisMeanState(ref,com,**keywords): +def AnalysisMeanStateSites(ref,com,**keywords): """Perform a mean state analysis. This mean state analysis examines the model mean state in space @@ -848,6 +841,7 @@ def AnalysisMeanState(ref,com,**keywords): the unit to use when displaying output on plots on the HTML page """ + from Variable import Variable regions = keywords.get("regions" ,["global"]) dataset = keywords.get("dataset" ,None) @@ -860,14 +854,14 @@ def AnalysisMeanState(ref,com,**keywords): skip_iav = keywords.get("skip_iav" ,False) skip_cycle = keywords.get("skip_cycle" ,False) ILAMBregions = Regions() - spatial = ref.spatial + spatial = False normalizer = None # Only study the annual cycle if it makes sense if not ref.monthly: skip_cycle = True if ref.time.size < 12: skip_cycle = True - - # We find + if skip_rmse : skip_iav = True + if spatial: lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) @@ -926,14 +920,58 @@ def AnalysisMeanState(ref,com,**keywords): # Compute the bias, RMSE, and RMS maps using the interpolated # quantities bias = REF_timeint.bias(COM_timeint) - bias_score_map = Score(bias,REF_timeint) + cREF = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata) + crms = cREF.rms () + bias_score_map = Score(bias,crms) if spatial: bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask if not skip_rmse: - rmse = REF.rmse(COM) - rms = REF.rms () - rmse_score_map = Score(rmse,rms) - + cCOM = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata) + rmse = REF.rmse( COM) + crmse = cREF.rmse(cCOM) + rmse_score_map = Score(crmse,crms) + if not skip_iav: + ref_iav = Variable(name = "centralized %s" % ref.name, unit = ref.unit, + data = np.ma.masked_array(ref.data-ref_timeint.data[np.newaxis,...],mask=ref.data.mask), + time = ref.time, time_bnds = ref.time_bnds, + lat = ref.lat , lat_bnds = ref.lat_bnds, + lon = ref.lon , lon_bnds = ref.lon_bnds, + area = ref.area, ndata = ref.ndata).rms() + com_iav = Variable(name = "centralized %s" % com.name, unit = com.unit, + data = np.ma.masked_array(com.data-com_timeint.data[np.newaxis,...],mask=com.data.mask), + time = com.time, time_bnds = com.time_bnds, + lat = com.lat , lat_bnds = com.lat_bnds, + lon = com.lon , lon_bnds = com.lon_bnds, + area = com.area, ndata = com.ndata).rms() + REF_iav = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata).rms() + COM_iav = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata).rms() + iav_score_map = Score(Variable(name = "diff %s" % REF.name, unit = REF.unit, + data = (COM_iav.data-REF_iav.data), + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata), + REF_iav) + # The phase shift comes from the interpolated quantities if not skip_cycle: ref_cycle = REF.annualCycle() @@ -948,7 +986,7 @@ def AnalysisMeanState(ref,com,**keywords): ref_period_mean = {}; ref_spaceint = {}; ref_mean_cycle = {}; ref_dtcycle = {} com_period_mean = {}; com_spaceint = {}; com_mean_cycle = {}; com_dtcycle = {} bias_val = {}; bias_score = {}; rmse_val = {}; rmse_score = {} - space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {} + space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {}; iav_score = {} ref_union_mean = {}; ref_comp_mean = {} com_union_mean = {}; com_comp_mean = {} for region in regions: @@ -975,6 +1013,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region] = rmse .integrateInSpace(region=region,mean=True) rmse_score [region] = rmse_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) space_std[region],space_cor[region],sd_score[region] = REF_timeint.spatialDistribution(COM_timeint,region=region) else: ref_period_mean[region] = ref_timeint .siteStats(region=region) @@ -995,6 +1035,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region] = rmse .siteStats(region=region) rmse_score [region] = rmse_score_map .siteStats(region=region,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .siteStats(region=region,weight=normalizer) ref_period_mean[region].name = "Period Mean (original grids) %s" % (region) ref_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) @@ -1005,6 +1047,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region].name = "RMSE %s" % (region) rmse_score [region].name = "RMSE Score %s" % (region) + if not skip_iav: + iav_score [region].name = "Interannual Variability Score %s" % (region) if not skip_cycle: ref_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) ref_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) @@ -1033,6 +1077,7 @@ def _convert(var,unit): plot_vars = [com_timeint,ref_timeint,bias,com_spaceint,ref_spaceint,bias_val] if not skip_rmse: plot_vars += [rmse,rmse_val] if not skip_cycle: plot_vars += [com_mean_cycle,ref_mean_cycle,com_dtcycle,ref_dtcycle] + if not skip_iav: plot_vars += [com_iav] for var in plot_vars: _convert(var,plot_unit) # Rename and optionally dump out information to netCDF4 files @@ -1064,13 +1109,17 @@ def _convert(var,unit): out_vars.append(shift_score_map) if not skip_rmse: rmse .name = "rmse_map_of_%s" % ref.name - rms .name = "rms_map_of_%s" % ref.name rmse_score_map.name = "rmsescore_map_of_%s" % ref.name out_vars.append(rmse) - out_vars.append(rms ) out_vars.append(rmse_score_map) out_vars.append(rmse_val) out_vars.append(rmse_score) + if not skip_iav: + com_iav.name = "iav_map_of_%s" % ref.name + iav_score_map.name = "iavscore_map_of_%s" % ref.name + out_vars.append(com_iav) + out_vars.append(iav_score_map) + out_vars.append(iav_score) if dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1089,6 +1138,9 @@ def _convert(var,unit): if not skip_cycle: ref_maxt_map.name = "phase_map_of_%s" % ref.name out_vars += [ref_maxt_map,ref_mean_cycle,ref_dtcycle] + if not skip_iav: + ref_iav.name = "iav_map_of_%s" % ref.name + out_vars.append(ref_iav) if benchmark_dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1097,124 +1149,303 @@ def _convert(var,unit): var.toNetCDF4(benchmark_dataset,group="MeanState") return - -def AnalysisRelationship(dep_var,ind_var,dataset,rname,**keywords): - """Perform a relationship analysis. - - Expand to provide details of what exactly is done. + +def AnalysisMeanStateSpace(ref,com,**keywords): + """Perform a mean state analysis. + + This mean state analysis examines the model mean state in space + and time. We compute the mean variable value over the time period + at each spatial cell or data site as appropriate, as well as the + bias and RMSE relative to the observational variable. We will + output maps of the period mean values and bias. For each spatial + cell or data site we also estimate the phase of the variable by + finding the mean time of year when the maximum occurs and the + phase shift by computing the difference in phase with respect to + the observational variable. In the spatial dimension, we compute a + spatial mean for each of the desired regions and an average annual + cycle. Parameters ---------- - dep_var : ILAMB.Variable.Variable - the dependent variable - ind_var : ILAMB.Variable.Variable - the independent variable - dataset : netCDF4.Dataset + obs : ILAMB.Variable.Variable + the observational (reference) variable + mod : ILAMB.Variable.Variable + the model (comparison) variable + regions : list of str, optional + the regions overwhich to apply the analysis + dataset : netCDF4.Dataset, optional a open dataset in write mode for caching the results of the analysis which pertain to the model - rname : str - the name of the relationship under study - regions : list of str, optional - a list of units over which to apply the analysis - dep_plot_unit,ind_plot_unit : str, optional - the name of the unit to use in the plots found on the HTML output - - """ - def _extractMaxTemporalOverlap(v1,v2): # should move? - t0 = max(v1.time.min(),v2.time.min()) - tf = min(v1.time.max(),v2.time.max()) - for v in [v1,v2]: - begin = np.argmin(np.abs(v.time-t0)) - end = np.argmin(np.abs(v.time-tf))+1 - v.time = v.time[begin:end] - v.data = v.data[begin:end,...] - mask = v1.data.mask + v2.data.mask - v1 = v1.data[mask==0].flatten() - v2 = v2.data[mask==0].flatten() - return v1,v2 - - # grab regions - regions = keywords.get("regions",["global"]) + benchmark_dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the observations + space_mean : bool, optional + disable to compute sums of the variable over space instead of + mean values + table_unit : str, optional + the unit to use when displaying output in tables on the HTML page + plots_unit : str, optional + the unit to use when displaying output on plots on the HTML page + + """ + from Variable import Variable + regions = keywords.get("regions" ,["global"]) + dataset = keywords.get("dataset" ,None) + benchmark_dataset = keywords.get("benchmark_dataset",None) + space_mean = keywords.get("space_mean" ,True) + table_unit = keywords.get("table_unit" ,None) + plot_unit = keywords.get("plot_unit" ,None) + mass_weighting = keywords.get("mass_weighting" ,False) + skip_rmse = keywords.get("skip_rmse" ,False) + skip_iav = keywords.get("skip_iav" ,False) + skip_cycle = keywords.get("skip_cycle" ,False) + ILAMBregions = Regions() + spatial = ref.spatial + + # Convert str types to booleans + if type(skip_rmse) == type(""): + skip_rmse = (skip_rmse.lower() == "true") + if type(skip_iav ) == type(""): + skip_iav = (skip_iav .lower() == "true") + if type(skip_cycle) == type(""): + skip_cycle = (skip_cycle.lower() == "true") - # convert to plot units - dep_plot_unit = keywords.get("dep_plot_unit",dep_var.unit) - ind_plot_unit = keywords.get("ind_plot_unit",ind_var.unit) - if dep_plot_unit is not None: dep_var.convert(dep_plot_unit) - if ind_plot_unit is not None: ind_var.convert(ind_plot_unit) - - # if the variables are temporal, we need to get period means - if dep_var.temporal: dep_var = dep_var.integrateInTime(mean=True) - if ind_var.temporal: ind_var = ind_var.integrateInTime(mean=True) - mask = dep_var.data.mask + ind_var.data.mask - - # analysis over regions - for region in regions: + # Check if we need to skip parts of the analysis + if not ref.monthly : skip_cycle = True + if ref.time.size < 12: skip_cycle = True + if ref.time.size == 1: skip_rmse = True + if skip_rmse : skip_iav = True + name = ref.name + + # Interpolate both reference and comparison to a grid composed of + # their cell breaks + ref.convert(plot_unit) + com.convert(plot_unit) + lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) + REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + unit = REF.unit + area = REF.area + ndata = REF.ndata + + # Find the mean values over the time period + ref_timeint = ref.integrateInTime(mean=True).convert(plot_unit) + com_timeint = com.integrateInTime(mean=True).convert(plot_unit) + REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) + COM_timeint = COM.integrateInTime(mean=True).convert(plot_unit) + normalizer = REF_timeint.data if mass_weighting else None + + # Report period mean values over all possible representations of + # land + ref_and_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == False) + ref_not_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == True ) + com_not_ref = (REF_timeint.data.mask == True ) * (COM_timeint.data.mask == False) + if benchmark_dataset is not None: - lats,lons = ILAMBregions[region] - rmask = (np.outer((dep_var.lat>lats[0])*(dep_var.latlons[0])*(dep_var.lon 1 else REF_timeint) + bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask + if dataset is not None: + bias.name = "bias_map_of_%s" % name + bias.toNetCDF4(dataset,group="MeanState") + bias_score_map.name = "biasscore_map_of_%s" % name + bias_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) + bias_val.name = "Bias %s" % region + bias_val.toNetCDF4(dataset,group="MeanState") + bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + bias_score.name = "Bias Score %s" % region + bias_score.toNetCDF4(dataset,group="MeanState") + del bias,bias_score_map + + # Spatial mean: plots + if REF.time.size > 1: + if benchmark_dataset is not None: + for region in regions: + ref_spaceint = REF.integrateInSpace(region=region,mean=True) + ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + ref_spaceint.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + for region in regions: + com_spaceint = COM.integrateInSpace(region=region,mean=True) + com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + com_spaceint.toNetCDF4(dataset,group="MeanState") + + # RMSE: maps, scalars, and scores + if not skip_rmse: + rmse = REF.rmse(COM).convert(plot_unit) + del REF + cCOM = Variable(name = "centralized %s" % name, unit = unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = COM.area, ndata = COM.ndata).convert(plot_unit) + del COM + crmse = cREF.rmse(cCOM).convert(plot_unit) + del cREF + if skip_iav: del cCOM + rmse_score_map = Score(crmse,REF_iav) + if dataset is not None: + rmse.name = "rmse_map_of_%s" % name + rmse.toNetCDF4(dataset,group="MeanState") + rmse_score_map.name = "rmsescore_map_of_%s" % name + rmse_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) + rmse_val.name = "RMSE %s" % region + rmse_val.toNetCDF4(dataset,group="MeanState") + rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + rmse_score.name = "RMSE Score %s" % region + rmse_score.toNetCDF4(dataset,group="MeanState") + del rmse,crmse,rmse_score_map + + # IAV: maps, scalars, scores + if not skip_iav: + COM_iav = cCOM.rms() + del cCOM + iav_score_map = Score(Variable(name = "diff %s" % name, unit = unit, + data = (COM_iav.data-REF_iav.data), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = area, ndata = ndata), + REF_iav) + if benchmark_dataset is not None: + REF_iav.name = "iav_map_of_%s" % name + REF_iav.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + COM_iav.name = "iav_map_of_%s" % name + COM_iav.toNetCDF4(dataset,group="MeanState") + iav_score_map.name = "iavscore_map_of_%s" % name + iav_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + iav_score = iav_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + iav_score.name = "Interannual Variability Score %s" % region + iav_score.toNetCDF4(dataset,group="MeanState") + del COM_iav,iav_score_map + del REF_iav + + return def ClipTime(v,t0,tf): """Remove time from a variable based on input bounds. @@ -1300,10 +1531,10 @@ def MakeComparable(ref,com,**keywords): # If the reference is spatial, the comparison must be if ref.spatial and not com.spatial: - msg = "%s Datasets are not uniformly spatial: " % logstring - msg += "reference = %s, comparison = %s" % (ref.spatial,com.spatial) - logger.debug(msg) - raise VarsNotComparable() + ref = ref.extractDatasites(com.lat,com.lon) + msg = "%s The reference dataset is spatial but the comparison is site-based. " % logstring + msg += "Extracted %s sites from the reference to match the comparison." % ref.ndata + logger.info(msg) # If the reference is layered, the comparison must be if ref.layered and not com.layered: @@ -1383,7 +1614,7 @@ def MakeComparable(ref,com,**keywords): # comparison, coarsen the comparison if np.log10(ref.dt/com.dt) > 0.5: com = com.coarsenInTime(ref.time_bnds,window=window) - + # Time bounds of the reference dataset t0 = ref.time_bnds[ 0,0] tf = ref.time_bnds[-1,1] diff --git a/test/scores_test.csv.gold b/test/scores_test.csv.gold index 6d65ab7b..6fccfb93 100644 --- a/test/scores_test.csv.gold +++ b/test/scores_test.csv.gold @@ -1,9 +1,9 @@ Variables,CLM50r243CRUNCEP,CLM50r243GSWP3 -Biomass,0.595710463937,0.678304573522 -Gross Primary Productivity,0.753476728464,0.741270301037 -Global Net Ecosystem Carbon Balance,0.705400063727,0.863669079462 -Net Ecosystem Exchange,0.524058275106,0.504338904659 -Terrestrial Water Storage Anomaly,0.484015616221,0.470205924215 -Albedo,0.771776381299,0.774604472682 -Surface Air Temperature,0.988457088529,0.990624010352 -Precipitation,0.812343937554,0.824581872315 +Biomass,0.5957104653413856,0.6783045750117078 +Gross Primary Productivity,0.6217211297637607,0.6126273585798891 +Global Net Ecosystem Carbon Balance,0.7054000637266042,0.8636690794621101 +Net Ecosystem Exchange,0.3941918077804778,0.38120476926634617 +Terrestrial Water Storage Anomaly,0.7000653021257858,0.7269702240175762 +Albedo,0.5434663466148166,0.544587485316599 +Surface Air Temperature,0.9256731031865132,0.9314748385926337 +Precipitation,0.7555153501937276,0.7679655805094326 From fb61681b50dc6689d04a6fa400722d1158b018d1 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Fri, 6 Jul 2018 20:42:54 -0600 Subject: [PATCH 08/22] recursively resolve all XML variables in the env_dict. This should be done in the processXmlLib.xml_to_dict --- Tools/pp_config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Tools/pp_config b/Tools/pp_config index d98d8555..3e20cfca 100755 --- a/Tools/pp_config +++ b/Tools/pp_config @@ -275,6 +275,10 @@ def main(options): for tree in xml_trees: xml_processor.xml_to_dict(tree, envDict) + # this should be done in the xml_to_dict! + # resolve all the xml variables + envDict = cesmEnvLib.readXML(case_dir, xml_filenames) + # 'get' user input if options.get: entry_id = options.get[0] From fbe4ed3ee19e2585c8aeef8607e2a9019d1ef0ac Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Mon, 6 Aug 2018 10:49:27 -0600 Subject: [PATCH 09/22] updates for ncar_pylib virtualenv on geyser for python 2.7.14 --- Machines/geyser_modules | 2 ++ Machines/machine_postprocess.xml | 7 ++++--- Makefile | 2 +- Templates/batch_geyser.tmpl | 5 +---- Templates/postprocess.tmpl | 14 ++------------ cesm_utils/cesm_utils/create_postprocess | 3 ++- conform/conform/__init__.py | 0 create_python_env | 14 +++++++------- .../diagnostics/imb/Config/config_diags_iomb.xml | 4 ++-- diagnostics/diagnostics/imb/imb_initialize.py | 4 +++- 10 files changed, 24 insertions(+), 31 deletions(-) create mode 100644 conform/conform/__init__.py diff --git a/Machines/geyser_modules b/Machines/geyser_modules index ddbb2b80..6f9ee684 100755 --- a/Machines/geyser_modules +++ b/Machines/geyser_modules @@ -15,5 +15,7 @@ module load ncl/6.4.0 # use "ncar_pylib --help" to see all options ncar_pylib -c 20180510 ${pp_dir}/cesm-env2 +export PYTHONPATH=${pp_dir}/cesm-env2/lib/python2.7/site-packages + module list diff --git a/Machines/machine_postprocess.xml b/Machines/machine_postprocess.xml index 9d669be5..febc33b3 100644 --- a/Machines/machine_postprocess.xml +++ b/Machines/machine_postprocess.xml @@ -6,18 +6,19 @@ 64 64 srun - /glade/apps/opt/python/2.7.7/gnu-westmere/4.8.2/lib/python2.7/site-packages + f2py ifort -c -g -O2 - -I/glade/apps/opt/netcdf/4.3.3.1/intel/16.0.0/include - -L/glade/apps/opt/netcdf/4.3.3.1/intel/16.0.0/lib -lnetcdff -lnetcdf + -I/glade/u/apps/dav/opt/netcdf/4.6.1/intel/17.0.1/include + -L/glade/u/apps/dav/opt/netcdf/4.6.1/intel/17.0.1/lib -lnetcdff -lnetcdf module purge + module load python/2.7.14 module load intel/17.0.1 module load ncarenv module load ncarcompilers diff --git a/Makefile b/Makefile index 81764c37..4f842b11 100644 --- a/Makefile +++ b/Makefile @@ -26,8 +26,8 @@ SUBDIRS = \ timeseries \ conformer \ conform \ - diagnostics \ ilamb \ + diagnostics # MAKECMDGOALS is the make option: make 'clobber' or 'all' TARGET = $(MAKECMDGOALS) diff --git a/Templates/batch_geyser.tmpl b/Templates/batch_geyser.tmpl index feb0d5fe..9525800e 100644 --- a/Templates/batch_geyser.tmpl +++ b/Templates/batch_geyser.tmpl @@ -1,4 +1,4 @@ -#! /bin/bash -1 +#! /bin/bash -l #SBATCH -n {{ pes }} #SBATCH -N {{ nodes }} @@ -12,6 +12,3 @@ #SBATCH -e {{ processName }}.err.%J #SBATCH -o {{ processName }}.out.%J -source /glade/u/apps/opt/slurm_init/init.sh - -export LD_LIBRARY_PATH=/glade/apps/opt/netcdf/4.3.0/gnu/default/lib:$LD_LIBRARY_PATH diff --git a/Templates/postprocess.tmpl b/Templates/postprocess.tmpl index 3f516a62..f5673720 100644 --- a/Templates/postprocess.tmpl +++ b/Templates/postprocess.tmpl @@ -17,33 +17,23 @@ if [ ! -e {{ virtualEnvDir }} ]; then exit fi -## NOTE: the module load order and when the python virtualenv is activated is IMPORTANT! -## Purging the modules first clears all environment variables that might have been set -## by the virtualenv activation. Consequently, in order to ensure a correct environment -## we must activate the virtualenv *after* the purge. - -## 1. purge and load the default system modules that the virtualenv was built with - {% for module in reset_modules %} {{ module }} {% endfor %} -## 2. check the processName for ocn_diagnostics_geyser and set the OCNDIAG_DIAGROOTPATH -## to point to the geyser virtualenv in order for the correct za compiled tool - {% if "ocn_diagnostics_geyser" in processName %} pp_geyser_path=`./pp_config --get POSTPROCESS_PATH_GEYSER --value` ./pp_config --set OCNDIAG_DIAGROOTPATH=$pp_geyser_path/ocn_diag {% endif %} -## 3. activate the virtualenv that contains all the non-bootstrapped dependencies +## activate the virtualenv that contains all the non-bootstrapped dependencies cd {{ virtualEnvDir }} echo "Running from virtualenv directory:" pwd . activate -## 4. load the boot-strap modules +## load the boot-strap modules {% for module in modules %} {{ module }} diff --git a/cesm_utils/cesm_utils/create_postprocess b/cesm_utils/cesm_utils/create_postprocess index 7d8cb817..92222331 100755 --- a/cesm_utils/cesm_utils/create_postprocess +++ b/cesm_utils/cesm_utils/create_postprocess @@ -27,6 +27,7 @@ if sys.hexversion < 0x02070000: import argparse import collections import errno +import getpass import itertools import os import platform @@ -560,7 +561,7 @@ def initialize_main(envDict, options, standalone): envDict['PROJECT'] = options.project[0] # set the user name - envDict['USER_NAME'] = os.getlogin() + envDict['USER_NAME'] = getpass.getuser() if options.username: envDict['USER_NAME'] = options.username[0] diff --git a/conform/conform/__init__.py b/conform/conform/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/create_python_env b/create_python_env index 34cc441d..e3d3dc9e 100755 --- a/create_python_env +++ b/create_python_env @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash -l # # script to setup the python virtual environment for postprocessing # @@ -205,14 +205,14 @@ fi #---------------------------------------------------------------------- # compile and install ocn_diag remap shared object (remap.so) program #---------------------------------------------------------------------- -echo "---------------------------------------------------------" -echo "$progname - Compiling ocn diagnostics remap.so" +##echo "---------------------------------------------------------" +##echo "$progname - Compiling ocn diagnostics remap.so" # reads XML and call subprocess f2py -create_f2py_remap --machine $machine -if [ $? -ne 0 ]; then - echo "WARNING: Problem with ocean diagnostics create_f2py_remap in $pp_dir" -fi +##create_f2py_remap --machine $machine +##if [ $? -ne 0 ]; then +## echo "WARNING: Problem with ocean diagnostics create_f2py_remap in $pp_dir" +##fi #---------------------------------------------------------------------- # compile and install ocn_diag zonal average (za) program diff --git a/diagnostics/diagnostics/imb/Config/config_diags_iomb.xml b/diagnostics/diagnostics/imb/Config/config_diags_iomb.xml index 3fde67f3..f45f1751 100644 --- a/diagnostics/diagnostics/imb/Config/config_diags_iomb.xml +++ b/diagnostics/diagnostics/imb/Config/config_diags_iomb.xml @@ -43,12 +43,12 @@ desc="matplotlib backend for generating graphics, should be exported to the environment!" > - diff --git a/diagnostics/diagnostics/imb/imb_initialize.py b/diagnostics/diagnostics/imb/imb_initialize.py index a5397b54..0ca6f1d7 100755 --- a/diagnostics/diagnostics/imb/imb_initialize.py +++ b/diagnostics/diagnostics/imb/imb_initialize.py @@ -175,7 +175,9 @@ def expand_batch_vars(envDict, imb_name): except: raise RuntimeError('CLI_OPTIONS must be specified in the imb env xml file.') - diag_root = "{0}_ROOT".format(imb_name.upper()) +## diag_root = "{0}_ROOT".format(imb_name.upper()) + # The ROOT env var should always be set to ILAMB_ROOT regardless of whether running ILAMB or IOMB + diag_root = "ILAMB_ROOT" env_vars = [] env_vars.append("export {0}={1}".format('MPLBACKEND', envDict['MPLBACKEND'])) env_vars.append("export {0}={1}".format(diag_root, envDict[diag_root])) From f75d36aff2bf84b83b51cc597bbe2c936f539910 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 14 Aug 2018 11:34:42 -0600 Subject: [PATCH 10/22] adding SBATCH -m block directive to fix the internode MPI communication issue when submitting jobs to geyser from a geyser login node --- Templates/batch_geyser.tmpl | 2 +- Templates/cylc_batch_geyser.tmpl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Templates/batch_geyser.tmpl b/Templates/batch_geyser.tmpl index 9525800e..55953104 100644 --- a/Templates/batch_geyser.tmpl +++ b/Templates/batch_geyser.tmpl @@ -11,4 +11,4 @@ #SBATCH --mem {{ memory }} #SBATCH -e {{ processName }}.err.%J #SBATCH -o {{ processName }}.out.%J - +#SBATCH -m block diff --git a/Templates/cylc_batch_geyser.tmpl b/Templates/cylc_batch_geyser.tmpl index fc72703f..05c709b1 100644 --- a/Templates/cylc_batch_geyser.tmpl +++ b/Templates/cylc_batch_geyser.tmpl @@ -9,3 +9,4 @@ -e {{ processName }}.err.%J -o {{ processName }}.out.%J --mem {{ memory }} +-m block From 13c9423222b8ebc4131b424a24afa36f1f1f1bab Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 14 Aug 2018 15:40:12 -0600 Subject: [PATCH 11/22] first attempt at using ncar_pylib on cheyenne with clone version 20180705 and DAV in place of geyser --- Config/config_postprocess.xml | 4 +- Machines/cheyenne_modules | 36 ++++---- Machines/dav_modules | 21 +++++ Machines/machine_postprocess.xml | 83 +++++++++++++++---- Machines/machine_postprocess.xsd | 10 +-- Templates/batch_dav.tmpl | 13 +++ Templates/cylc_batch_dav.tmpl | 12 +++ Templates/postprocess.tmpl | 6 +- Tools/pp_config | 2 +- cesm_utils/cesm_utils/create_postprocess | 77 ++++++++++------- create_python_env | 28 +++---- diagnostics/diagnostics/imb/imb_initialize.py | 6 +- 12 files changed, 200 insertions(+), 98 deletions(-) create mode 100755 Machines/dav_modules create mode 100644 Templates/batch_dav.tmpl create mode 100644 Templates/cylc_batch_dav.tmpl diff --git a/Config/config_postprocess.xml b/Config/config_postprocess.xml index ac881a49..446c75de 100644 --- a/Config/config_postprocess.xml +++ b/Config/config_postprocess.xml @@ -43,12 +43,12 @@ desc="post processing directory location on local machine where cesm-env2 python virtualenv is located." > - + + 64 + 64 + srun + + f2py + + ifort + -c -g -O2 + -I/glade/u/apps/dav/opt/netcdf/4.6.1/intel/17.0.1/include + -L/glade/u/apps/dav/opt/netcdf/4.6.1/intel/17.0.1/lib -lnetcdff -lnetcdf + + + module purge + + + module load python/2.7.14 + module load intel/17.0.1 + module load ncarenv + module load ncarcompilers + module load impi + module load netcdf/4.6.1 + module load nco/4.7.4 + module load ncl/6.4.0 + + + + 32 + 16 + 6 + /glade/p/cesm/amwg/amwg_data + + + 32 + 4 + /glade/p/cesm/pcwg/ice/data + + + 32 + 12 + 6 + /glade/p/cesm/lmwg/diag/lnd_diag_data + + + 32 + 16 + /glade/p/cesm/ + + + 2 + 1 + /glade/p/cesm/lmwg_dev/oleson/ILAMB/ILAMB_all + + + 2 + 1 + /glade/p/cesm/omwg/obs_data/IOMB + + + + 64 64 @@ -77,24 +138,16 @@ module purge - module load ncarenv/1.2 - module load intel/17.0.1 - module load ncarcompilers/0.4.1 - module load mpt/2.15f - module load python/2.7.13 - module load numpy/1.12.0 - module load scipy/0.18.1 - module load mpi4py/2.0.0-mpt - module load pynio/1.4.1 - module load matplotlib/2.0.0 - module load netcdf/4.4.1.1 - module load nco/4.6.2 - module load netcdf4-python/1.2.7 - module load cf_units/1.1.3 + module load python/2.7.14 + module load intel/17.0.1 + module load ncarenv + module load ncarcompilers + module load impi + module load netcdf/4.6.1 + module load nco/4.7.4 module load ncl/6.4.0 - module load pyngl/1.5.0b diff --git a/Machines/machine_postprocess.xsd b/Machines/machine_postprocess.xsd index 3b4c8f0f..d2765b71 100644 --- a/Machines/machine_postprocess.xsd +++ b/Machines/machine_postprocess.xsd @@ -28,7 +28,7 @@ - + @@ -42,7 +42,7 @@ - + @@ -56,7 +56,7 @@ - + @@ -70,7 +70,7 @@ - + @@ -84,7 +84,7 @@ - + diff --git a/Templates/batch_dav.tmpl b/Templates/batch_dav.tmpl new file mode 100644 index 00000000..54a6f5e0 --- /dev/null +++ b/Templates/batch_dav.tmpl @@ -0,0 +1,13 @@ +#! /bin/bash -l + +#SBATCH -n {{ pes }} +#SBATCH -N {{ nodes }} +#SBATCH --ntasks-per-node={{ ppn }} +#SBATCH -t {{ wallclock }} +#SBATCH -p dav +#SBATCH -J {{ processName }} +#SBATCH -A {{ project }} +#SBATCH --mem {{ memory }} +#SBATCH -e {{ processName }}.err.%J +#SBATCH -o {{ processName }}.out.%J +#SBATCH -m block diff --git a/Templates/cylc_batch_dav.tmpl b/Templates/cylc_batch_dav.tmpl new file mode 100644 index 00000000..05c709b1 --- /dev/null +++ b/Templates/cylc_batch_dav.tmpl @@ -0,0 +1,12 @@ +-n {{ pes }} +-N {{ nodes }} +--ntasks-per-node={{ ppn }} +-t {{ wallclock }} +-p dav +-J {{ processName }} +-A {{ project }} +-C {{ queue }} +-e {{ processName }}.err.%J +-o {{ processName }}.out.%J +--mem {{ memory }} +-m block diff --git a/Templates/postprocess.tmpl b/Templates/postprocess.tmpl index f5673720..34b64d68 100644 --- a/Templates/postprocess.tmpl +++ b/Templates/postprocess.tmpl @@ -21,9 +21,9 @@ fi {{ module }} {% endfor %} -{% if "ocn_diagnostics_geyser" in processName %} -pp_geyser_path=`./pp_config --get POSTPROCESS_PATH_GEYSER --value` -./pp_config --set OCNDIAG_DIAGROOTPATH=$pp_geyser_path/ocn_diag +{% if "ocn_diagnostics_dav" in processName %} +pp_dav_path=`./pp_config --get POSTPROCESS_PATH_DAV --value` +./pp_config --set OCNDIAG_DIAGROOTPATH=$pp_dav_path/ocn_diag {% endif %} ## activate the virtualenv that contains all the non-bootstrapped dependencies diff --git a/Tools/pp_config b/Tools/pp_config index 3e20cfca..90dd1ed6 100755 --- a/Tools/pp_config +++ b/Tools/pp_config @@ -93,7 +93,7 @@ import jinja2 # global variables _scripts = ['timeseries','averages','regrid','diagnostics','xconform'] -_machines = ['cheyenne','edison','geyser'] +_machines = ['cheyenne','edison','dav'] _comps = ['atm','ice','lnd','ocn'] # ------------------------------------------------------------------------------- diff --git a/cesm_utils/cesm_utils/create_postprocess b/cesm_utils/cesm_utils/create_postprocess index 92222331..383da5e1 100755 --- a/cesm_utils/cesm_utils/create_postprocess +++ b/cesm_utils/cesm_utils/create_postprocess @@ -58,7 +58,7 @@ try: except KeyError: err_msg = ('create_postprocess ERROR: please set the POSTPROCESS_PATH environment variable.' \ ' For example on cheyenne: setenv POSTPROCESS_PATH /glade/p/cesm/postprocessing_ch' \ - ' In addition, for geyser support: setenv POSTPROCESS_PATH_GEYSER /glade/p/cesm/postprocessing_geyser') + ' In addition, for DAV support: setenv POSTPROCESS_PATH_DAV /glade/p/cesm/postprocessing_dav') raise OSError(err_msg) cesm_pp_path = os.environ["POSTPROCESS_PATH"] @@ -271,7 +271,9 @@ def read_machine_xml(machineName, xmlFile): # get the timeseries pes first tseries_pes = xmlmachine.find('timeseries_pes') machine['timeseries_pes'] = tseries_pes.text - machine['timeseries_queue'] = tseries_pes.get('queue').lower() + machine['timeseries_queue'] = '' + if 'queue' in tseries_pes.attrib: + machine['timeseries_queue'] = tseries_pes.get('queue').lower() machine['timeseries_ppn'] = tseries_pes.get('pes_per_node').lower() machine['timeseries_wallclock'] = tseries_pes.get('wallclock').lower() machine['timeseries_nodes'] = '' @@ -285,7 +287,9 @@ def read_machine_xml(machineName, xmlFile): # get the conform pes xconform_pes = xmlmachine.find('xconform_pes') machine['xconform_pes'] = xconform_pes.text - machine['conform_queue'] = xconform_pes.get('queue').lower() + machine['conform_queue'] = '' + if 'queue' in xconform_pes.attrib: + machine['conform_queue'] = xconform_pes.get('queue').lower() machine['conform_ppn'] = xconform_pes.get('pes_per_node').lower() machine['conform_wallclock'] = xconform_pes.get('wallclock').lower() machine['conform_nodes'] = '' @@ -319,7 +323,9 @@ def read_machine_xml(machineName, xmlFile): avg = comp.find('averages_pes') if avg is not None: machine['{0}_averages_pes'.format(compName)] = avg.text - machine['{0}_averages_queue'.format(compName)] = avg.get('queue').lower() + machine['{0}_averages_queue'.format(compName)] = '' + if 'queue' in avg.attrib: + machine['{0}_averages_queue'.format(compName)] = avg.get('queue').lower() machine['{0}_averages_ppn'.format(compName)] = avg.get('pes_per_node').lower() machine['{0}_averages_wallclock'.format(compName)] = avg.get('wallclock').lower() machine['{0}_averages_nodes'.format(compName)] = '' @@ -333,7 +339,9 @@ def read_machine_xml(machineName, xmlFile): diags = comp.find('diagnostics_pes') if diags is not None: machine['{0}_diagnostics_pes'.format(compName)] = diags.text - machine['{0}_diagnostics_queue'.format(compName)] = diags.get('queue').lower() + machine['{0}_diagnostics_queue'.format(compName)] = '' + if 'queue' in diags.attrib: + machine['{0}_diagnostics_queue'.format(compName)] = diags.get('queue').lower() machine['{0}_diagnostics_ppn'.format(compName)] = diags.get('pes_per_node').lower() machine['{0}_diagnostics_wallclock'.format(compName)] = diags.get('wallclock').lower() machine['{0}_diagnostics_nodes'.format(compName)] = '' @@ -346,7 +354,9 @@ def read_machine_xml(machineName, xmlFile): init = comp.find('initialize_pes') if init is not None: machine['{0}_initialize_pes'.format(compName)] = init.text - machine['{0}_initialize_queue'.format(compName)] = init.get('queue').lower() + machine['{0}_initialize_queue'.format(compName)] = '' + if 'queue' in init.attrib: + machine['{0}_initialize_queue'.format(compName)] = init.get('queue').lower() machine['{0}_initialize_ppn'.format(compName)] = init.get('pes_per_node').lower() machine['{0}_initialize_wallclock'.format(compName)] = init.get('wallclock').lower() machine['{0}_initialize_nodes'.format(compName)] = '' @@ -359,7 +369,9 @@ def read_machine_xml(machineName, xmlFile): regrid = comp.find('regrid_pes') if regrid is not None: machine['{0}_regrid_pes'.format(compName)] = regrid.text - machine['{0}_regrid_queue'.format(compName)] = regrid.get('queue').lower() + machine['{0}_regrid_queue'.format(compName)] = '' + if 'queue' in regrid.attrib: + machine['{0}_regrid_queue'.format(compName)] = regrid.get('queue').lower() machine['{0}_regrid_ppn'.format(compName)] = regrid.get('pes_per_node').lower() machine['{0}_regrid_wallclock'.format(compName)] = regrid.get('wallclock').lower() machine['{0}_regrid_nodes'.format(compName)] = '' @@ -608,13 +620,13 @@ def main(options): if not envDict['MACH']: raise OSError('create_postprocess ERROR: hostname "{0}" is not currently supported. Exiting...'.format(hostname)) - # check if env POSTPROCESS_PATH_GEYSER needs to be set - if (envDict['MACH'] == 'cheyenne' or envDict['MACH'] == 'geyser'): + # check if env POSTPROCESS_PATH_DAV needs to be set + if (envDict['MACH'] == 'cheyenne' or envDict['MACH'] == 'dav'): try: - envDict["POSTPROCESS_PATH_GEYSER"] = os.environ["POSTPROCESS_PATH_GEYSER"] + envDict["POSTPROCESS_PATH_DAV"] = os.environ["POSTPROCESS_PATH_DAV"] except KeyError: - err_msg = ('create_postprocess ERROR: please set the POSTPROCESS_PATH_GEYSER environment variable.' \ - ' For example, setenv POSTPROCESS_PATH_GEYSER /glade/p/cesm/postprocessing_geyser') + err_msg = ('create_postprocess ERROR: please set the POSTPROCESS_PATH_DAV environment variable.' \ + ' For example, setenv POSTPROCESS_PATH_DAV /glade/p/cesm/postprocessing_dav') raise OSError(err_msg) # make the appropriate dirs in the caseroot @@ -875,16 +887,17 @@ def main(options): imb_env_vars='{% for env in imb_env_vars %}\n{{ env }}\n{% endfor %}', imb_options='{{ imb_options }}') - # check if machine is cheyenne then create a set of geyser submission scripts + # check if machine is cheyenne then create a set of dav submission scripts if envDict['MACH'] == 'cheyenne': - hostname = 'geyser' + hostname = 'dav' envDict['MACH'] = cesmEnvLib.get_machine_name(hostname, '{0}/Machines/machine_postprocess.xml'.format(envDict['POSTPROCESS_PATH'])) - pp_geyser = os.environ["POSTPROCESS_PATH_GEYSER"] + ##pp_dav = os.environ["POSTPROCESS_PATH_DAV"] + pp_dav = os.environ["POSTPROCESS_PATH"] # get the machine dependent variables, modules and mpi run command in a dictionary machine = dict() machine = read_machine_xml(machineName=envDict['MACH'], - xmlFile='{0}/Machines/machine_postprocess.xml'.format(pp_geyser)) + xmlFile='{0}/Machines/machine_postprocess.xml'.format(pp_dav)) # define the template files for the batch scripts batch_tmpl = 'batch_{0}.tmpl'.format(envDict['MACH']) @@ -892,9 +905,9 @@ def main(options): # generate the timeseries batch submit script from template files postProcessCmd = 'cesm_tseries_generator.py' - processName = 'timeseries_geyser' + processName = 'timeseries_dav' outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'],processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['timeseries_pes'], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -914,10 +927,10 @@ def main(options): # generate the xconform batch submit script from template files postProcessCmd = 'cesm_conform_generator.py' - processName = 'xconform_geyser' + processName = 'xconform_dav' outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'],processName) xconform_tmpl = 'xconform.tmpl' - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['xconform_pes'], batchTmpl=batch_tmpl, runTmpl=xconform_tmpl, postProcessCmd=postProcessCmd, @@ -937,10 +950,10 @@ def main(options): # generate the iconform batch submit script from template files postProcessCmd = 'cesm_conform_initialize.py' - processName = 'iconform_geyser' + processName = 'iconform_dav' outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'],processName) iconform_tmpl = 'iconform.tmpl' - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['xconform_pes'], batchTmpl=batch_tmpl, runTmpl=iconform_tmpl, postProcessCmd=postProcessCmd, @@ -962,9 +975,9 @@ def main(options): for comp in compList: # generate the averages batch submit script postProcessCmd = '{0}_avg_generator.py'.format(comp) - processName = '{0}_averages_geyser'.format(comp) + processName = '{0}_averages_dav'.format(comp) outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'], processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['{0}_averages_pes'.format(comp)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -984,9 +997,9 @@ def main(options): # generate the diagnostics batch submit script postProcessCmd = '{0}_diags_generator.py'.format(comp) - processName = '{0}_diagnostics_geyser'.format(comp) + processName = '{0}_diagnostics_dav'.format(comp) outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'], processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['{0}_diagnostics_pes'.format(comp)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -1008,9 +1021,9 @@ def main(options): for comp in regridList: # generate the regrid batch submit script postProcessCmd = '{0}_regrid_generator.py'.format(comp) - processName = '{0}_regrid_geyser'.format(comp) + processName = '{0}_regrid_dav'.format(comp) outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'], processName) - create_batch(pp_geyser, + create_batch(pp_dav, pes=machine['{0}_regrid_pes'.format(comp)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -1032,9 +1045,9 @@ def main(options): for imb in imbList: # generate the serial script that sets up the imb config file. postProcessCmd = 'imb_initialize.py' - processName = '{0}_initialize_geyser'.format(imb) + processName = '{0}_initialize_dav'.format(imb) outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'], processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['{0}_initialize_pes'.format(imb)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -1062,9 +1075,9 @@ def main(options): # variables and run imb_initialize multiple times, each time # write it out to imb_diagnostics. postProcessCmd = 'imb_diags_generator.py' - processName = '{0}_diagnostics_geyser'.format(imb) + processName = '{0}_diagnostics_dav'.format(imb) outFile = '{0}/{1}.tmpl'.format(envDict['PP_CASE_PATH'], processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['{0}_diagnostics_pes'.format(imb)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, diff --git a/create_python_env b/create_python_env index e3d3dc9e..c4033164 100755 --- a/create_python_env +++ b/create_python_env @@ -137,14 +137,12 @@ fi #---------------------------------------------------------------------- env="${pp_dir}/cesm-env2" echo $env -if [ -f $env ] && [ ${machine} != geyser ]; then +if [ ! -d $env ]; then status="ERROR" - info="$progname - ${pp_dir}/cesm-env2 virtual environment already exists. -It is only necessary to create the virtual environment once for a given machine. -All post processing scripts residing in a CASE directory will activate and deactivate -the virtual environment as necessary. + info="$progname - ${pp_dir}/cesm-env2 virtual environment does not exist. +Check the $module_script for the correct ncar_pylib virtualenv clone command. +If a new or updated virtual environment needs to be created then follow these steps: -If a new or updated virtual environment needs to be created then following these steps: >cd ${pp_dir} >make clobber >make clobber-env @@ -162,17 +160,17 @@ cd $pp_dir # create the virtual environment. Makefile checks to see if it is # already setup, so only done once per case. #---------------------------------------------------------------------- -echo "$progname - making virtual environment in ${pp_dir}/cesm-env2." -if [ ${machine} != geyser ]; then - make env - if [ $? -ne 0 ]; then - echo "ERROR: Unable to create virtual environment in ${pp_dir}/cesm-env2. Exiting..." - exit 1 - fi -fi +##echo "$progname - making virtual environment in ${pp_dir}/cesm-env2." +##if [ ${machine} != geyser ]; then +## make env +## if [ $? -ne 0 ]; then +## echo "ERROR: Unable to create virtual environment in ${pp_dir}/cesm-env2. Exiting..." +## exit 1 +## fi +##fi #---------------------------------------------------------------------- -# activate it for this script +# activate virtualenv for remainder of this script #---------------------------------------------------------------------- echo "$progname - activating virtual environment in ${pp_dir}/cesm-env2." . cesm-env2/bin/activate diff --git a/diagnostics/diagnostics/imb/imb_initialize.py b/diagnostics/diagnostics/imb/imb_initialize.py index 0ca6f1d7..8d217a82 100755 --- a/diagnostics/diagnostics/imb/imb_initialize.py +++ b/diagnostics/diagnostics/imb/imb_initialize.py @@ -206,9 +206,9 @@ def expand_batch_vars(envDict, imb_name): print(' {0} - {1}'.format(e.cmd, e.output)) - # create a template and batch for geyser slurm - if envDict['MACH'] == 'cheyenne' or envDict['MACH'] == 'geyser': - hostname = 'geyser' + # create a template and batch for DAV slurm + if envDict['MACH'] == 'cheyenne' or envDict['MACH'] == 'dav': + hostname = 'dav' template_filename = '{0}_diagnostics_{1}.tmpl'.format(imb_name, hostname) templateLoader = jinja2.FileSystemLoader( searchpath='{0}'.format(envDict["CASEROOT"]) ) templateEnv = jinja2.Environment( loader=templateLoader ) From aec3b1f3b09ea91a4bc57654991ba7c0c1a3228a Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 14 Aug 2018 17:09:30 -0600 Subject: [PATCH 12/22] update to get hostname=dav for DAV cluster machines --- Templates/iconform.tmpl | 2 +- Templates/postprocess.tmpl | 2 +- Templates/xconform.tmpl | 2 +- cesm_utils/cesm_utils/cesmEnvLib.py | 2 ++ 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Templates/iconform.tmpl b/Templates/iconform.tmpl index 63690122..46898013 100644 --- a/Templates/iconform.tmpl +++ b/Templates/iconform.tmpl @@ -13,7 +13,7 @@ if [ ! -e {{ virtualEnvDir }} ]; then echo "CESM {{ processName }} exiting due to non-existant python virtual environment in" echo " {{ virtualEnvDir }}" echo "You must first run:" - echo "$SRCROOT/postprocessing/create_python_env.sh -machine [machine]" + echo "$SRCROOT/postprocessing/create_python_env -machine [machine]" echo "*************************************************************************************" exit fi diff --git a/Templates/postprocess.tmpl b/Templates/postprocess.tmpl index 34b64d68..63d8caf4 100644 --- a/Templates/postprocess.tmpl +++ b/Templates/postprocess.tmpl @@ -12,7 +12,7 @@ if [ ! -e {{ virtualEnvDir }} ]; then echo "CESM {{ processName }} exiting due to non-existant python virtual environment in" echo " {{ virtualEnvDir }}" echo "You must first run:" - echo "$POSTPROCESS_PATH/create_python_env.sh -machine [machine]" + echo "$POSTPROCESS_PATH/create_python_env -machine [machine]" echo "*************************************************************************************" exit fi diff --git a/Templates/xconform.tmpl b/Templates/xconform.tmpl index d73e1565..4430bbd0 100644 --- a/Templates/xconform.tmpl +++ b/Templates/xconform.tmpl @@ -13,7 +13,7 @@ if [ ! -e {{ virtualEnvDir }} ]; then echo "CESM {{ processName }} exiting due to non-existant python virtual environment in" echo " {{ virtualEnvDir }}" echo "You must first run:" - echo "$SRCROOT/postprocessing/create_python_env.sh -machine [machine]" + echo "$SRCROOT/postprocessing/create_python_env -machine [machine]" echo "*************************************************************************************" exit fi diff --git a/cesm_utils/cesm_utils/cesmEnvLib.py b/cesm_utils/cesm_utils/cesmEnvLib.py index c2d7f039..c99d5b24 100755 --- a/cesm_utils/cesm_utils/cesmEnvLib.py +++ b/cesm_utils/cesm_utils/cesmEnvLib.py @@ -203,6 +203,8 @@ def get_hostname(): index = hostname.find(".") if index > 0: hostname = hostname[0:index] + else: + hostname = re.split('(\d+)',hostname)[0] return hostname From 3c83f4d1c7cccd08a4921bc1b07ecd3329a17eff Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 14 Aug 2018 17:13:46 -0600 Subject: [PATCH 13/22] update for DAV --- cesm_utils/cesm_utils/create_postprocess | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cesm_utils/cesm_utils/create_postprocess b/cesm_utils/cesm_utils/create_postprocess index 383da5e1..a62084b8 100755 --- a/cesm_utils/cesm_utils/create_postprocess +++ b/cesm_utils/cesm_utils/create_postprocess @@ -891,8 +891,7 @@ def main(options): if envDict['MACH'] == 'cheyenne': hostname = 'dav' envDict['MACH'] = cesmEnvLib.get_machine_name(hostname, '{0}/Machines/machine_postprocess.xml'.format(envDict['POSTPROCESS_PATH'])) - ##pp_dav = os.environ["POSTPROCESS_PATH_DAV"] - pp_dav = os.environ["POSTPROCESS_PATH"] + pp_dav = os.environ["POSTPROCESS_PATH_DAV"] # get the machine dependent variables, modules and mpi run command in a dictionary machine = dict() From 9e13eb305d66e13a3ad48af13369bd64272f8c60 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Thu, 16 Aug 2018 16:32:21 -0600 Subject: [PATCH 14/22] updates to get cheyenne ncar_pylib virtualenv working with clone 20180705 --- Machines/cheyenne_modules | 2 +- Machines/machine_postprocess.xml | 2 +- Templates/batch_cheyenne.tmpl | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Machines/cheyenne_modules b/Machines/cheyenne_modules index ff3cf923..592e9a7b 100755 --- a/Machines/cheyenne_modules +++ b/Machines/cheyenne_modules @@ -6,7 +6,7 @@ module load python/2.7.14 module load intel/17.0.1 module load ncarenv module load ncarcompilers -module load impi +module load mpt/2.15f module load netcdf/4.6.1 module load nco/4.7.4 module load ncl/6.4.0 diff --git a/Machines/machine_postprocess.xml b/Machines/machine_postprocess.xml index 9f36ac69..13c2c282 100644 --- a/Machines/machine_postprocess.xml +++ b/Machines/machine_postprocess.xml @@ -144,7 +144,7 @@ module load intel/17.0.1 module load ncarenv module load ncarcompilers - module load impi + module load mpt/2.15f module load netcdf/4.6.1 module load nco/4.7.4 module load ncl/6.4.0 diff --git a/Templates/batch_cheyenne.tmpl b/Templates/batch_cheyenne.tmpl index 6168cd6e..3bfb6d65 100644 --- a/Templates/batch_cheyenne.tmpl +++ b/Templates/batch_cheyenne.tmpl @@ -1,4 +1,4 @@ -#! /usr/bin/env/bash +#!/bin/bash #PBS -N {{ processName }} #PBS -q {{ queue }} @@ -6,10 +6,8 @@ #PBS -l walltime={{ wallclock }} #PBS -A {{ project }} -. /glade/u/apps/ch/opt/lmod/7.2.1/lmod/lmod/init/bash +source /etc/profile.d/modules.sh -export I_MPI_DEVICE=rdma export MPI_UNBUFFERED_STDIO=true export TMPDIR=$TMPDIR -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/glade/u/apps/ch/opt/pythonpkgs/2.7/cf_units/1.1.3/gnu/6.2.0/lib From f274586b4b5056ff442615041581d692a07b9be7 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Thu, 16 Aug 2018 16:35:31 -0600 Subject: [PATCH 15/22] Squashed 'ilamb/ilamb/' changes from f46c647..5f35ec3 5f35ec3 cf_units requires more recent numpy but does not have it in their requirements 4bf570e 2.3 release f1ce475 doc cleanup from cfunits move 3c88a3a move from cfunits to cf_units 515cfb1 added initial logic will excludes outliers when computing functional relationships eed6fb0 fixed a bug which would cause a crash if non-string global attributes were found in the netCDF files. Thanks to Declan Valters for the report 9197e39 added plot limits to the summary images 9af8e66 added a derived expression for twsa and required code support 919c5d6 updated the gold scores, fixed an error in handling the bias score in biomass, large differences possible 69d7d85 force a mask 0ab4b6c fixed some unit issues 70d6042 moved where some scalars are saved in the analysis 3348b64 neglected to put writes where we can be sure datasets are defined f59b823 restored site analysis, needs revamped 2c3136a finished memory refactor on the spatial analysis 78ce3e5 removed a duplicated function definition 0fb6a7f first pass at memory optimzation 40fa2e7 added a pass functionality to unit conversions 3968d67 added a nbytes routine to determine the size of Variables b152fd9 rework of sympifywithargs to reduce memory usage a9cefa0 changes to scores based on added iav 922d9ac removed numpy restriction cb7f921 removed /= because it is causing problems with numpy 1.14 c454c2d restored iav to the analysis b82ffa0 Merge branch 'ncollier/grace-change' 3673d67 changes in gold score to reflect methodology change 2420b0c change of directory name to reflect a last name and not a acronym 0a7636c added an attempt to purge C for carbon in units 23fdf42 added site plots to runoff and fixed twsa/runoff plots to show by basin in the all models tab acbdaca small fix to location of a dataset 49d8eab added a permafrost extent comparison 67c86b4 added a snow water equivalent dataset d110066 Merge branch 'master' of bitbucket.org:ncollier/ilamb 75a417a numpy 1.14.0 was found to give an error when using the /= operator on masked arrays. For now limiting the upper limit on the version f6c7692 Relaxed a requirement that the reference and comparison datasets must be uniformly spatial. This allows us to use a spatial observational dataset to compare against models run at sites. d81f144 added a reference explaining why we have changed the methodology 7a2e751 changes to grace methodology based on Swenson's suggestions f688075 missed a barrier which can be problematic when trying to render composite plots and compute relationships 3fae4d8 wrong import d58c5e4 changes in gold scores due to method change 4997958 rework and abstraction of relationship analysis, fixed warnings in plot generation e5d50a4 added more horizontal width per variable label length d19c523 added a --rel_only option to ilamb-run that will render summary plots without absolute scores 2968f2e safety if no relationships exist e4857ed fixed relationship plot ce53afd first pass at a tool for extracting scalar scores from files in csv format 4aa220d shifts in scoring of bias and rmse 3648557 conditional if no relationships exist git-subtree-dir: ilamb/ilamb git-subtree-split: 5f35ec3e07f7ea8441429ea0c3e864d1aa8340e1 --- README.rst | 41 ++- bin/ilamb-run | 9 +- bin/ilamb-table | 65 ++++ demo/ilamb.cfg | 26 ++ doc/install.rst | 4 +- setup.py | 10 +- src/ILAMB/ConfPermafrost.py | 223 +++++++++++++ src/ILAMB/ConfRunoff.py | 44 ++- src/ILAMB/ConfTWSA.py | 243 ++++++++------ src/ILAMB/Confrontation.py | 491 +++++++++++++++------------- src/ILAMB/ModelResult.py | 1 - src/ILAMB/Post.py | 28 +- src/ILAMB/Scoreboard.py | 130 ++------ src/ILAMB/Variable.py | 101 +++--- src/ILAMB/__init__.py | 6 +- src/ILAMB/constants.py | 22 ++ src/ILAMB/ilamblib.py | 633 ++++++++++++++++++++++++------------ test/scores_test.csv.gold | 16 +- 18 files changed, 1394 insertions(+), 699 deletions(-) create mode 100644 bin/ilamb-table create mode 100644 src/ILAMB/ConfPermafrost.py diff --git a/README.rst b/README.rst index 86120b54..ebbe75ee 100644 --- a/README.rst +++ b/README.rst @@ -31,27 +31,38 @@ Useful Information * `CLM `_ - land comparison against 3 CLM versions and 2 forcings * `CMIP5 `_ - land comparison against a collection of CMIP5 models * `IOMB `_ - ocean comparison against a few ocean models - + +* Paper `preprint `_ which + details the design and methodology employed in the ILAMB package * If you find the package or the ouput helpful in your research or development efforts, we kindly ask you to cite the following reference (DOI:10.18139/ILAMB.v002.00/1251621). -ILAMB 2.2 Release +ILAMB 2.3 Release ----------------- -We are pleased to announce version 2.2 of the ILAMB python package. Among many small bugfixes and enhancements, the new version contains the following new features: - -* A new installed command ``ilamb-fetch`` has been included which can be run to automatically download the observational datasets. Running this command after the data has been downloaded will check your collection for updates and consistency. -* A new installed command ``ilamb-doctor`` has been included which can be run with options similar to ``ilamb-run`` to help identify which values a particular configure file needs in order to run. -* ILAMB will now check the spatial extents of all the models present in the current run and clip away to the largest shared extent. This allows ILAMB to be applied to regional models. -* User-defined regions can now be added at runtime either by specifying latitude/longitude bounds, or a mask in a netCDF4 file. For specifics, consult the regions `tutorial `_. -* Added a runoff and evaporative fraction benchmark to the ILAMB canon, removed the GFED3 and GFED4 burned area data products. -* Added many more plots to the generic output including the RMSE and the score maps. -* The ILAMB core has been enhanced to better handle depths. This has enabled ocean comparisons among others. -* An initial collection of ocean datasets has been assembled in the ``demo/iomb.cfg`` file for ocean benchmarking. -* The plotting phase of ``ilamb-run`` may now be skipped with the ``--skip_plots`` option. -* Relationship overall scores are now available in an image on the main html output page. -* Additional `tutorials `_ have been added to explain these new features. +We are pleased to announce version 2.3 of the ILAMB python +package. Among many bugfixes and improvements we highlight these major +changes: + +* You may observe a large shift in some score values. In this version + we solidified our scoring methodology while writing a `paper + `_ which necesitated + reworking some of the scores. For details, see the linked paper. +* Made a memory optimization pass through the analysis routines. Peak + memory usage and the time at peak was reduced improving performance. +* Restructured the symbolic manipulation of derived variables to + greatly reduce the required memory. +* Moved from using cfunits to cf_units. Both are python wrappers + around the UDUNITS library, but cfunits is stagnant and placed a + lower limit to the version of the netCDF4 python wrappers we could + use. +* The scoring of the interannual variability was missed in the port + from version 1 to 2, we have added the metric. +* The terrestrial water storage anomaly GRACE metric was changed to + compare mean anomaly values over large river basins. For details see + the ILAMB paper. + Funding ------- diff --git a/bin/ilamb-run b/bin/ilamb-run index a98e05fd..03f7b9d5 100644 --- a/bin/ilamb-run +++ b/bin/ilamb-run @@ -378,6 +378,7 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): print (" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name) sys.stdout.flush() + comm.Barrier() for c in C: if not skip_plots: try: @@ -476,7 +477,8 @@ parser.add_argument('--model_setup', dest="model_setup", type=str, nargs='+',def help='list files model setup information') parser.add_argument('--skip_plots', dest="skip_plots", action="store_true", help='enable to skip the plotting phase') - +parser.add_argument('--rel_only', dest="rel_only", action="store_true", + help='enable only display relative differences in overall scores') args = parser.parse_args() if args.config is None: if rank == 0: @@ -505,7 +507,8 @@ S = Scoreboard(args.config[0], master = rank==0, verbose = not args.quiet, build_dir = args.build_dir[0], - extents = RestrictiveModelExtents(M)) + extents = RestrictiveModelExtents(M), + rel_only = args.rel_only) C = MatchRelationshipConfrontation(S.list()) Cf = FilterConfrontationList(C,args.confront) @@ -520,7 +523,7 @@ if args.logging: if rank == 0: logger.info(" " + " ".join(os.uname())) - for key in ["ILAMB","numpy","matplotlib","netCDF4","cfunits","sympy","mpi4py"]: + for key in ["ILAMB","numpy","matplotlib","netCDF4","cf_units","sympy","mpi4py"]: pkg = __import__(key) try: path = pkg.__path__[0] diff --git a/bin/ilamb-table b/bin/ilamb-table new file mode 100644 index 00000000..374b6abb --- /dev/null +++ b/bin/ilamb-table @@ -0,0 +1,65 @@ +#!/usr/bin/env python +""" +""" +from ILAMB.Scoreboard import Scoreboard +from netCDF4 import Dataset +import os,argparse,sys + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, + help='path to configuration file to use') +parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], + help='path of where to save the output') +parser.add_argument('--csv_file', dest="csv", metavar='csv', type=str, nargs=1,default=["table.csv"], + help='destination filename for the table') + +args = parser.parse_args() +if args.config is None: + print "\nError: You must specify a configuration file using the option --config\n" + sys.exit(1) + +S = Scoreboard(args.config[0],verbose=False,build_dir=args.build_dir[0]) + +region = "global" +scalar = "RMSE" +sname = "%s %s" % (scalar,region) +group = "MeanState" +table = {} +unit = {} +for c in S.list(): + for subdir, dirs, files in os.walk(c.output_path): + for fname in files: + if not fname.endswith(".nc"): continue + with Dataset(os.path.join(c.output_path,fname)) as dset: + if group not in dset.groups .keys(): continue + if "scalars" not in dset.groups[group].groups.keys(): continue + grp = dset.groups[group]["scalars"] + if sname not in grp.variables.keys(): continue + var = grp.variables[sname] + if not table.has_key(c.longname): + table[c.longname] = {} + unit [c.longname] = var.units + table[c.longname][dset.name] = var[...] + +# What models have data? +models = [] +for key in table.keys(): + for m in table[key].keys(): + if m not in models: models.append(m) +models.sort() + +# render a table of values in csv format +lines = ",".join(["Name","Units"] + models) +for c in S.list(): + if not table.has_key(c.longname): continue + line = "%s,%s" % (c.longname,unit[c.longname]) + for m in models: + if table[c.longname].has_key(m): + line += ",%g" % (table[c.longname][m]) + else: + line += "," + lines += "\n%s" % line + +with file(args.csv[0],mode="w") as f: + f.write(lines) + diff --git a/demo/ilamb.cfg b/demo/ilamb.cfg index 83692eb2..793227fd 100644 --- a/demo/ilamb.cfg +++ b/demo/ilamb.cfg @@ -284,6 +284,7 @@ skip_iav = True [h2: Terrestrial Water Storage Anomaly] variable = "twsa" alternate_vars = "tws" +derived = "pr-evspsbl-mrro" cmap = "Blues" weight = 5 ctype = "ConfTWSA" @@ -292,6 +293,31 @@ ctype = "ConfTWSA" source = "DATA/twsa/GRACE/twsa_0.5x0.5.nc" weight = 25 +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Snow Water Equivalent] +variable = "swe" +alternate_vars = "snw" +cmap = "Blues" +weight = 5 + +[CanSISE] +source = "DATA/swe/CanSISE/swe.nc" +weight = 25 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Permafrost] +variable = "tsl" + +[NSIDC] +ctype = "ConfPermafrost" +source = "DATA/permafrost/NSIDC/NSIDC_0.5x0.5.nc" +y0 = 1970. +yf = 2000. +Teps = 273.15 +dmax = 3.5 + ########################################################################### [h1: Radiation and Energy Cycle] diff --git a/doc/install.rst b/doc/install.rst index 7ccfaaf9..84a85043 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -25,7 +25,7 @@ include: * netCDF4_, a python/numpy interface to the netCDF C library (you must have the C library installed) * sympy_, a python library for symbolic mathematics * mpi4py_, a python wrapper around the MPI library (you must have a MPI implementation installed) -* cfunits_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) +* cf_units_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) I have designated that a few of these dependencies are python interfaces to C libraries and so the library must also be installed @@ -271,7 +271,7 @@ Next open the local copy of the file with a editor and search for .. _numpy: https://www.numpy.org/ .. _matplotlib: https://matplotlib.org/ .. _netCDF4: https://github.com/Unidata/netcdf4-python -.. _cfunits: https://bitbucket.org/cfpython/cfunits-python/ +.. _cf_units: https://github.com/SciTools/cf-units .. _basemap: https://github.com/matplotlib/basemap .. _sympy: https://www.sympy.org/ .. _mpi4py: https://pythonhosted.org/mpi4py/ diff --git a/setup.py b/setup.py index e36e7ff2..b189267f 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ import subprocess import os -VERSION = '2.2' +VERSION = '2.3' def git_version(): """ @@ -97,12 +97,12 @@ def write_version_py(filename=os.path.join('src/ILAMB', 'generated_version.py')) keywords=['benchmarking','earth system modeling','climate modeling','model intercomparison'], packages=['ILAMB'], package_dir={'ILAMB' : 'src/ILAMB'}, - scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor'], - install_requires=['numpy>=1.9.2', + scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor','bin/ilamb-table'], + install_requires=['numpy>=1.11.0', 'matplotlib>=1.4.3', #'basemap>=1.0.7', # basemap is in pypi but broken, need to manually install - 'netCDF4>=1.1.4,<=1.2.4', # upper limit is for cfunits - 'cfunits>=1.1.4', + 'netCDF4>=1.1.4', + 'cf_units>=2.0.0', 'sympy>=0.7.6', 'mpi4py>=1.3.1', 'scipy>=0.9.0'] diff --git a/src/ILAMB/ConfPermafrost.py b/src/ILAMB/ConfPermafrost.py new file mode 100644 index 00000000..92c2ead4 --- /dev/null +++ b/src/ILAMB/ConfPermafrost.py @@ -0,0 +1,223 @@ +from Confrontation import Confrontation +from mpl_toolkits.basemap import Basemap +from Variable import Variable +from Post import ColorBar +import matplotlib.pyplot as plt +from netCDF4 import Dataset +import ilamblib as il +import numpy as np + +class ConfPermafrost(Confrontation): + + def __init__(self,**keywords): + + # Ugly, but this is how we call the Confrontation constructor + super(ConfPermafrost,self).__init__(**keywords) + + # Now we overwrite some things which are different here + self.layout + self.regions = ["global"] + self.layout.regions = self.regions + self.weight = { "Obs Score" : 1., + "Mod Score" : 1. } + for page in self.layout.pages: + page.setMetricPriority(["Total Area" , + "Overlap Area", + "Missed Area" , + "Excess Area" , + "Obs Score" , + "Mod Score" , + "Overall Score"]) + + def stageData(self,m): + + obs = Variable(filename = self.source, + variable_name = "permafrost_extent") + + # These parameters may be changed from the configure file + y0 = float(self.keywords.get("y0" ,1970.)) # [yr] beginning year to include in analysis + yf = float(self.keywords.get("yf" ,2000.)) # [yr] end year to include in analysis + dmax = float(self.keywords.get("dmax",3.5)) # [m] consider layers where depth in is the range [0,dmax] + Teps = float(self.keywords.get("Teps",273.15)) # [K] temperature below which we assume permafrost occurs + + t0 = (y0 -1850.)*365. + tf = (yf+1-1850.)*365. + mod = m.extractTimeSeries(self.variable, + initial_time = t0, + final_time = tf) + mod.trim(t = [t0 ,tf ], + lat = [obs.lat.min(),90 ], + d = [0 ,dmax]) + mod = mod.annualCycle() + Tmax = mod.data.max(axis=0) + table = np.zeros(Tmax.shape[-2:]) + table[...] = np.NAN + thaw = np.zeros(table.shape,dtype=bool) + for i in range(mod.depth_bnds.shape[0]-1,-1,-1): + thaw += (Tmax[i]>=Teps) + frozen = np.where((Tmax[i] 60: fsize = 10 - ax.set_ylabel(ylabel,fontsize=fsize) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() - - # add the figure to the HTML layout - if name == "Benchmark" and region == "global": - short_name = short_name.replace("global_","") - page.addFigure(c.longname, - "benchmark_" + short_name, - "Benchmark_RNAME_%s.png" % (short_name), - legend = False, - benchmark = False) - page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, - benchmark = False) - - # determine the 1D relationship curves - bins = np.linspace(ind_min,ind_max,nbin+1) - delta = 0.1*(bins[1]-bins[0]) - inds = np.digitize(x,bins) - ids = np.unique(inds).clip(1,bins.size-1) - xb = [] - yb = [] - eb = [] - for i in ids: - yt = y[inds==i] - xi = 0.5 - xb.append(xi*bins[i-1]+(1.-xi)*bins[i]) - yb.append(yt.mean()) - try: - eb.append(yt.std()) # for some reason this fails sometimes - except: - eb.append(np.sqrt(((yt-yb[-1])**2).sum()/float(yt.size))) - - if name == "Benchmark": - obs_x = np.asarray(xb) - obs_y = np.asarray(yb) - obs_e = np.asarray(eb) - else: - mod_x = np.asarray(xb) - mod_y = np.asarray(yb) - mod_e = np.asarray(eb) - - # compute and plot the difference - O = np.array(obs_dist.data) - M = np.array(mod_dist.data) - O[np.where(obs_dist.mask)] = 0. - M[np.where(mod_dist.mask)] = 0. - dif_dist = np.ma.masked_array(M-O,mask=obs_dist.mask*mod_dist.mask) - lim = np.abs(dif_dist).max() - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - pc = ax.pcolormesh(xedges, - yedges, - dif_dist, - cmap = "Spectral_r", - vmin = -lim, - vmax = +lim) - div = make_axes_locatable(ax) - fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), - orientation="vertical", - label="Distribution Difference") - ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) - ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_diff_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="r+") as results: + + # Grab/create a relationship and scalars group + group = None + if "Relationships" not in results.groups: + group = results.createGroup("Relationships") + else: + group = results.groups["Relationships"] + if "scalars" not in group.groups: + scalars = group.createGroup("scalars") + else: + scalars = group.groups["scalars"] + + # for each relationship... + for c in self.relationships: + + # try to get the independent data from the model and obs + try: + ref_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) + com_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) + ind_name = c.longname.split("/")[0] + ind_min = c.limits["timeint"]["min"]-1e-12 + ind_max = c.limits["timeint"]["max"]+1e-12 + except: + continue + + # Add figures to the html page page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, + "benchmark_rel_%s" % ind_name, + "Benchmark_RNAME_rel_%s.png" % ind_name, + legend = False, benchmark = False) - - # score the distributions = 1 - Hellinger distance - score = 1.-np.sqrt(((np.sqrt(obs_dist)-np.sqrt(mod_dist))**2).sum())/np.sqrt(2) - vname = '%s Score %s' % (c.longname.split('/')[0],region) - #if vname in scalars.variables: - # scalars.variables[vname][0] = score - #else: - # Variable(name = vname, - # unit = "1", - # data = score).toNetCDF4(results,group="Relationships") - - # plot the 1D curve - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - ax.errorbar(obs_x-delta,obs_y,yerr=obs_e,fmt='-o',color='k') - ax.errorbar(mod_x+delta,mod_y,yerr=mod_e,fmt='-o',color=m.color) - ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) - ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_func_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, + "rel_%s" % ind_name, + "MNAME_RNAME_rel_%s.png" % ind_name, + legend = False, benchmark = False) + page.addFigure(c.longname, + "rel_diff_%s" % ind_name, + "MNAME_RNAME_rel_diff_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_func_%s" % ind_name, + "MNAME_RNAME_rel_func_%s.png" % ind_name, + legend = False, + benchmark = False) + + # Analysis over regions + lim_dep = [dep_min,dep_max] + lim_ind = [ind_min,ind_max] + longname = c.longname.split('/')[0] + for region in self.regions: + ref_dist = _buildDistributionResponse(ref_ind,ref_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + com_dist = _buildDistributionResponse(com_ind,com_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + + # Make the plots + _plotDistribution(ref_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), + "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) + _plotDistribution(com_dist[0],com_dist[1],com_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) + _plotDifference (ref_dist[0],com_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) + _plotFunction (ref_dist[3],ref_dist[4],com_dist[3],com_dist[4],ref_dist[1],ref_dist[2], + "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), + m.color, + os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) + + # Score the distribution + score = _scoreDistribution(ref_dist[0],com_dist[0]) + sname = "%s Hellinger Distance %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + + # Score the functional response + score = _scoreFunction(ref_dist[3],com_dist[3]) + sname = "%s RMSE Score %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + - # score the relationship - i0,i1 = np.where(np.abs(obs_x[:,np.newaxis]-mod_x)<1e-12) - obs_y = obs_y[i0]; mod_y = mod_y[i1] - isnan = np.isnan(obs_y)*np.isnan(mod_y) - obs_y[isnan] = 0.; mod_y[isnan] = 0. - score = np.exp(-np.linalg.norm(obs_y-mod_y)/np.linalg.norm(obs_y)) - vname = '%s RMSE Score %s' % (c.longname.split('/')[0],region) - if vname in scalars.variables: - scalars.variables[vname][0] = score - else: - Variable(name = vname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - results.close() + class FileContextManager(): diff --git a/src/ILAMB/ModelResult.py b/src/ILAMB/ModelResult.py index eff17ebf..6980d8b8 100644 --- a/src/ILAMB/ModelResult.py +++ b/src/ILAMB/ModelResult.py @@ -266,7 +266,6 @@ def derivedVariable(self,variable_name,expression,lats=None,lons=None,initial_ti """ from sympy import sympify - from cfunits import Units if expression is None: raise il.VarNotInModel() args = {} units = {} diff --git a/src/ILAMB/Post.py b/src/ILAMB/Post.py index 502f9f12..809b154d 100644 --- a/src/ILAMB/Post.py +++ b/src/ILAMB/Post.py @@ -223,11 +223,19 @@ def __init__(self,name,title): def __str__(self): r = Regions() - def _sortFigures(figure,priority=["benchmark_timeint","timeint","timeintremap","bias","rmse","benchmark_phase","phase","shift","biasscore","rmsescore","shiftscore","spatial_variance","legend_spatial_variance","spaceint","accumulate","cycle","dtcycle","compcycle","temporal_variance"]): + def _sortFigures(figure): + macro = ["timeint","bias","rmse","iav","phase","shift","variance","spaceint","accumulate","cycle"] val = 1. - for i,pname in enumerate(priority): - if pname == figure.name: val += 2**i - return val + for i,m in enumerate(macro): + if m in figure.name: val += 3**i + if figure.name.startswith("benchmark"): val -= 1. + if figure.name.endswith("score"): val += 1. + if figure.name.startswith("legend"): + if "variance" in figure.name: + val += 1. + else: + val = 0. + return val code = """
    @@ -834,11 +842,14 @@ def head(self): class HtmlLayout(): def __init__(self,pages,cname,years=None): - + self.pages = pages self.cname = cname.replace("/"," / ") if years is not None: - self.cname += " / %d-%d" % (years) + try: + self.cname += " / %d-%d" % (years) + except: + pass for page in self.pages: page.pages = self.pages page.cname = self.cname @@ -1048,7 +1059,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa nvariables = len(variables) maxV = max([len(v) for v in variables]) maxM = max([len(m) for m in models]) - wpchar = 0.1 + wpchar = 0.15 wpcell = 0.19 hpcell = 0.25 w = maxV*wpchar + max(4,nmodels)*wpcell @@ -1085,6 +1096,8 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa ax[0].set_yticklabels(variables[::-1]) ax[0].tick_params('both',length=0,width=0,which='major') ax[0].tick_params(axis='y',pad=10) + ax[0].set_xlim(0,nmodels) + ax[0].set_ylim(0,nvariables) if vcolor is not None: for i,t in enumerate(ax[0].yaxis.get_ticklabels()): t.set_backgroundcolor(vcolor[::-1][i]) @@ -1117,6 +1130,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa ax[i].set_xticklabels(models,rotation=90) ax[i].tick_params('both',length=0,width=0,which='major') ax[i].set_yticks([]) + ax[i].set_xlim(0,nmodels) ax[i].set_ylim(0,nvariables) if rel_only: ax[i].set_yticks (np.arange(nvariables)+0.5) diff --git a/src/ILAMB/Scoreboard.py b/src/ILAMB/Scoreboard.py index ba0b2cfb..5fceb929 100644 --- a/src/ILAMB/Scoreboard.py +++ b/src/ILAMB/Scoreboard.py @@ -5,6 +5,7 @@ from ConfEvapFraction import ConfEvapFraction from ConfIOMB import ConfIOMB from ConfDiurnal import ConfDiurnal +from ConfPermafrost import ConfPermafrost import os,re from netCDF4 import Dataset import numpy as np @@ -179,17 +180,19 @@ def ParseScoreboardConfigureFile(filename): "ConfRunoff" : ConfRunoff, "ConfEvapFraction": ConfEvapFraction, "ConfIOMB" : ConfIOMB, - "ConfDiurnal" : ConfDiurnal} + "ConfDiurnal" : ConfDiurnal, + "ConfPermafrost" : ConfPermafrost} class Scoreboard(): """ A class for managing confrontations """ - def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None): + def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None,rel_only=False): if not os.environ.has_key('ILAMB_ROOT'): raise ValueError("You must set the environment variable 'ILAMB_ROOT'") self.build_dir = build_dir + self.rel_only = rel_only if (master and not os.path.isdir(self.build_dir)): os.mkdir(self.build_dir) @@ -272,7 +275,7 @@ def createHtml(self,M,filename="index.html"): has_rel = np.asarray([len(rel.children) for rel in rel_tree.children]).sum() > 0 nav = "" if has_rel: - GenerateRelSummaryFigure(self,M,"%s/overview_rel.png" % self.build_dir) + GenerateRelSummaryFigure(rel_tree,M,"%s/overview_rel.png" % self.build_dir,rel_only=self.rel_only) nav = """
  • Relationship
  • """ #global global_print_node_string @@ -453,7 +456,7 @@ def createBarCharts(self,M): html = GenerateBarCharts(self.tree,M) def createSummaryFigure(self,M): - GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir) + GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir,rel_only=self.rel_only) def dumpScores(self,M,filename): out = file("%s/%s" % (self.build_dir,filename),"w") @@ -582,7 +585,7 @@ def GenerateTable(tree,M,S,composite=True): BuildHTMLTable(tree,M,S.build_dir) return global_html -def GenerateSummaryFigure(tree,M,filename): +def GenerateSummaryFigure(tree,M,filename,rel_only=False): models = [m.name for m in M] variables = [] @@ -602,96 +605,34 @@ def GenerateSummaryFigure(tree,M,filename): else: data[row,:] = var.score - BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors) - -def GenerateRelSummaryFigure(S,M,figname): - - def _parse(node): - global score,count,rows - if node.level != 5: return - row = "%s vs. %s" % (node.parent.parent.parent.name,node.parent.name) - col = node.name - if row not in rows: rows.append(row) - if not score .has_key(row): score[row] = {} - if not count .has_key(row): count[row] = {} - if not score[row].has_key(col): score[row][col] = 0. - if not count[row].has_key(col): count[row][col] = 0. - score[row][col] += node.score - count[row][col] += 1. - - class rnode(): - def __init__(self,name,level): - self.name = name - self.level = level - self.parent = None - self.score = None - self.children = [] - - - root = S.build_dir - tree = rnode("root",0) - previous_node = tree - current_level = 0 - - for subdir, dirs, files in os.walk(root): - if subdir == root: continue - flat = subdir.replace(root,"").lstrip("/").split("/") - level = len(flat) - name = flat[-1] - child = rnode(name,level) - if level == current_level: - child.parent = previous_node.parent - previous_node.parent.children.append(child) - if level == 3: - for fname in [f for f in files if f.endswith(".nc") and "Benchmark" not in f]: - with Dataset(os.path.join(subdir,fname)) as dset: - if "Relationships" not in dset.groups: continue - grp = dset.groups["Relationships"]["scalars"] - model = dset.name - for var in [var for var in grp.variables.keys() if ("Overall" not in var and - "global" in var)]: - rname = var.split(" ")[0] - hadrel = False - for c in child.children: - if c.name == rname: - rel = c - hadrel = True - if not hadrel: rel = rnode(rname,level+1) - mod = rnode(model,level+2) - mod.score = grp.variables[var][...] - mod.parent = rel - rel.children.append(mod) - rel.parent = child - if not hadrel: child.children.append(rel) - elif level > current_level: - child.parent = previous_node - previous_node.children.append(child) - current_level = level - else: - addto = tree - for i in range(level-1): addto = addto.children[-1] - child.parent = addto - addto.children.append(child) - current_level = level - previous_node = child - - global score,count,rows - score = {} - count = {} - rows = [] - TraversePreorder(tree,_parse) - models = [] - for row in rows: - for key in score[row].keys(): - if key not in models: models.append(key) - data = np.zeros((len(rows),len(models))) + BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors,rel_only=rel_only) + +def GenerateRelSummaryFigure(S,M,figname,rel_only=False): + + # reorganize the relationship data + scores = {} + counts = {} + rows = [] + vcolors = [] + for h1 in S.children: + for dep in h1.children: + dname = dep.name.split("/")[0] + for ind in dep.children: + iname = ind.name.split("/")[0] + key = "%s/%s" % (dname,iname) + if scores.has_key(key): + scores[key] += ind.score + counts[key] += 1. + else: + scores[key] = np.copy(ind.score) + counts[key] = 1. + rows .append(key) + vcolors.append(h1.bgcolor) + if len(rows) == 0: return + data = np.ma.zeros((len(rows),len(M))) for i,row in enumerate(rows): - for j,col in enumerate(models): - try: - data[i,j] = score[row][col] / count[row][col] - except: - data[i,j] = np.nan - BenchmarkSummaryFigure(models,rows,data,figname,rel_only=False) + data[i,:] = scores[row] / counts[row] + BenchmarkSummaryFigure([m.name for m in M],rows,data,figname,rel_only=rel_only,vcolor=vcolors) def GenerateRelationshipTree(S,M): @@ -753,7 +694,6 @@ def GenerateRelationshipTree(S,M): if "Overall Score global" not in grp.variables.keys(): continue h2.score[i] = grp.variables["Overall Score global"][...] - return rel_tree diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py index 7d1f48b8..fcfa33f9 100644 --- a/src/ILAMB/Variable.py +++ b/src/ILAMB/Variable.py @@ -3,7 +3,7 @@ from mpl_toolkits.basemap import Basemap import matplotlib.colors as colors from pylab import get_cmap -from cfunits import Units +from cf_units import Unit import ilamblib as il import Post as post import numpy as np @@ -220,6 +220,17 @@ def __str__(self): return s + def nbytes(self): + r"""Estimate the memory usage of a variable in bytes. + """ + nbytes = 0. + for key in self.__dict__.keys(): + try: + nbytes += self.__dict__[key].nbytes + except: + pass + return nbytes + def integrateInTime(self,**keywords): r"""Integrates the variable over a given time period. @@ -286,7 +297,7 @@ def integrateInTime(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Units(self.unit) + unit = Unit(self.unit) name = self.name + "_integrated_over_time" if mean: @@ -300,18 +311,18 @@ def integrateInTime(self,**keywords): else: dt = dt.sum(axis=0) np.seterr(over='ignore',under='ignore') - integral /= dt + integral = integral / dt np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("d")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("d")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = integral, - unit = unit.units, + unit = "%s" % unit, name = name, lat = self.lat, lat_bnds = self.lat_bnds, @@ -403,7 +414,7 @@ def integrateInDepth(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Units(self.unit) + unit = Unit(self.unit) name = self.name + "_integrated_over_depth" if mean: @@ -417,18 +428,18 @@ def integrateInDepth(self,**keywords): else: dz = dz.sum(axis=axis) np.seterr(over='ignore',under='ignore') - integral /= dz + integral = integral / dz np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("m")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("m")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = integral, - unit = unit.units, + unit = "%s" % unit, name = name, time = self.time, time_bnds = self.time_bnds, @@ -521,13 +532,13 @@ def _integrate(var,areas): integral = _integrate(self.data,measure) if mean: np.seterr(under='ignore') - integral /= measure.sum() + integral = integral / measure.sum() np.seterr(under='raise') # handle the name and unit name = self.name + "_integrated_over_space" if region is not None: name = name.replace("space",region) - unit = Units(self.unit) + unit = Unit(self.unit) if mean: # we have already divided thru by the non-masked area in @@ -536,12 +547,12 @@ def _integrate(var,areas): else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("m2")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("m2")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = np.ma.masked_array(integral), - unit = unit.units, + unit = "%s" % unit, time = self.time, time_bnds = self.time_bnds, depth = self.depth, @@ -710,7 +721,7 @@ def _make_bnds(x): bnds[0] = max(x[0] -0.5*(x[ 1]-x[ 0]),-180) bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) return bnds - assert Units(var.unit) == Units(self.unit) + assert Unit(var.unit) == Unit(self.unit) assert self.temporal == False assert self.ndata == var.ndata assert self.layered == False @@ -752,7 +763,7 @@ def _make_bnds(x): def convert(self,unit,density=998.2): """Convert the variable to a given unit. - We use the UDUNITS library via the cfunits python interface to + We use the UDUNITS library via the cf_units python interface to convert the variable's unit. Additional support is provided for unit conversions in which substance information is required. For example, in quantities such as precipitation it @@ -777,53 +788,53 @@ def convert(self,unit,density=998.2): this object with its unit converted """ - src_unit = Units(self.unit) - tar_unit = Units( unit) + if unit is None: return self + src_unit = Unit(self.unit) + tar_unit = Unit( unit) mask = self.data.mask # Define some generic quantities - linear = Units("m") - linear_rate = Units("m s-1") - area_density = Units("kg m-2") - area_density_rate = Units("kg m-2 s-1") - mass_density = Units("kg m-3") - volume_conc = Units("mol m-3") - mass_conc = Units("mol kg-1") - - # cfunits doesn't handle frequently found temperature expressions + linear = Unit("m") + linear_rate = Unit("m s-1") + area_density = Unit("kg m-2") + area_density_rate = Unit("kg m-2 s-1") + mass_density = Unit("kg m-3") + volume_conc = Unit("mol m-3") + mass_conc = Unit("mol kg-1") + + # UDUNITS doesn't handle frequently found temperature expressions synonyms = {"K":"degK", "R":"degR", "C":"degC", "F":"degF"} for syn in synonyms.keys(): - if src_unit.units == syn: src_unit = Units(synonyms[syn]) - if tar_unit.units == syn: tar_unit = Units(synonyms[syn]) + if src_unit.format() == syn: src_unit = Unit(synonyms[syn]) + if tar_unit.format() == syn: tar_unit = Unit(synonyms[syn]) # Do we need to multiply by density? - if ( (src_unit.equivalent(linear_rate) and tar_unit.equivalent(area_density_rate)) or - (src_unit.equivalent(linear ) and tar_unit.equivalent(area_density )) or - (src_unit.equivalent(mass_conc ) and tar_unit.equivalent(volume_conc )) ): + if ( (src_unit.is_convertible(linear_rate) and tar_unit.is_convertible(area_density_rate)) or + (src_unit.is_convertible(linear ) and tar_unit.is_convertible(area_density )) or + (src_unit.is_convertible(mass_conc ) and tar_unit.is_convertible(volume_conc )) ): np.seterr(over='ignore',under='ignore') self.data *= density np.seterr(over='raise',under='raise') src_unit *= mass_density # Do we need to divide by density? - if ( (tar_unit.equivalent(linear_rate) and src_unit.equivalent(area_density_rate)) or - (tar_unit.equivalent(linear ) and src_unit.equivalent(area_density )) or - (tar_unit.equivalent(mass_conc ) and src_unit.equivalent(volume_conc )) ): + if ( (tar_unit.is_convertible(linear_rate) and src_unit.is_convertible(area_density_rate)) or + (tar_unit.is_convertible(linear ) and src_unit.is_convertible(area_density )) or + (tar_unit.is_convertible(mass_conc ) and src_unit.is_convertible(volume_conc )) ): np.seterr(over='ignore',under='ignore') - self.data /= density + self.data = self.data / density np.seterr(over='raise',under='raise') - src_unit /= mass_density + src_unit = src_unit / mass_density # Convert units try: - self.data = Units.conform(self.data,src_unit,tar_unit) + self.data = src_unit.convert(self.data,tar_unit) self.data = np.ma.masked_array(self.data,mask=mask) self.unit = unit except: - print "var_name = %s, src_unit = %s, target_unit = %s " % (self.name,src_unit,tar_unit) raise il.UnitConversionError() return self @@ -1599,7 +1610,7 @@ def spatialDistribution(self,var,region="global"): R0 = 1.0 std0 = std0.clip(1e-12) std = std .clip(1e-12) - std /= std0 + std = std/std0 score = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0)) except: std = np.asarray([0.0]) diff --git a/src/ILAMB/__init__.py b/src/ILAMB/__init__.py index f8ea82b9..3bb5124a 100644 --- a/src/ILAMB/__init__.py +++ b/src/ILAMB/__init__.py @@ -1,6 +1,6 @@ __author__ = 'Nathan Collier' -__date__ = 'Nov 2017' -__version__ = '2.2' +__date__ = 'Jun 2018' +__version__ = '2.3' from distutils.version import LooseVersion import platform @@ -10,7 +10,7 @@ "numpy" : "1.9.2", "matplotlib" : "1.4.3", "netCDF4" : "1.1.4", - "cfunits" : "1.1.4", + "cf_units" : "2.0.0", "mpl_toolkits.basemap" : "1.0.7", "sympy" : "0.7.6", "mpi4py" : "1.3.1" diff --git a/src/ILAMB/constants.py b/src/ILAMB/constants.py index 714209d8..8d77b8f6 100644 --- a/src/ILAMB/constants.py +++ b/src/ILAMB/constants.py @@ -130,6 +130,28 @@ "sidelbl" :"RMSE SCORE", "haslegend" :True } +space_opts["iav"] = { "name" :"Interannual variability", + "cmap" :"Reds", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iav.png", + "sidelbl" :"MODEL INTERANNUAL VARIABILITY", + "haslegend" :True } + +space_opts["iavscore"] = { "name" :"Interannual variability score", + "cmap" :"RdYlGn", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iavscore.png", + "sidelbl" :"INTERANNUAL VARIABILITY SCORE", + "haslegend" :True } + space_opts["shift"] = { "name" :"Temporally integrated mean phase shift", "cmap" :"PRGn", "sym" :True, diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py index 1304be6a..b7cff096 100644 --- a/src/ILAMB/ilamblib.py +++ b/src/ILAMB/ilamblib.py @@ -3,11 +3,11 @@ from Regions import Regions from netCDF4 import Dataset,num2date,date2num from datetime import datetime -from cfunits import Units +from cf_units import Unit from copy import deepcopy from mpi4py import MPI import numpy as np -import logging +import logging,re logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) @@ -53,7 +53,30 @@ def __str__(self): return "NotLayeredVariable" class NotDatasiteVariable(Exception): def __str__(self): return "NotDatasiteVariable" +def FixDumbUnits(unit): + r"""Try to fix the dumb units people insist on using. + Parameters + ---------- + unit : str + the trial unit + + Returns + ------- + unit : str + the fixed unit + """ + # Various synonyms for 1 + if unit.lower().strip() in ["unitless", + "n/a", + "none"]: unit = "1" + # Remove the C which so often is used to mean carbon but actually means coulomb + tokens = re.findall(r"[\w']+", unit) + for token in tokens: + if token.endswith("C") and Unit(token[:-1]).is_convertible(Unit("g")): + unit = unit.replace(token,token[:-1]) + return unit + def GenerateDistinctColors(N,saturation=0.67,value=0.67): r"""Generates a series of distinct colors. @@ -86,7 +109,7 @@ def ConvertCalendar(t,tbnd=None): This routine converts the representation of time to the ILAMB default: days since 1850-1-1 00:00:00 on a 365-day calendar. This is so we can make comparisons with data from other models and - benchmarks. We use cfunits time conversion capability. + benchmarks. Parameters ---------- @@ -343,71 +366,51 @@ def SympifyWithArgsUnits(expression,args,units): """ from sympy import sympify,postorder_traversal - # The traversal needs that we make units commensurate when - # possible - keys = args.keys() - for i in range(len(keys)): - ikey = keys[i] - for j in range(i+1,len(keys)): - jkey = keys[j] - if Units(units[jkey]).equivalent(Units(units[ikey])): - args [jkey] = Units.conform(args[jkey], - Units(units[jkey]), - Units(units[ikey]), - inplace=True) - units[jkey] = units[ikey] - - # We need to do what sympify does but also with unit - # conversions. So we traverse the expression tree in post order - # and take actions based on the kind of operation being performed. expression = sympify(expression) + + # try to convert all arguments to same units if possible, it + # catches most use cases + keys = args.keys() + for i,key0 in enumerate(keys): + for key in keys[(i+1):]: + try: + Unit(units[key]).convert(args[key],Unit(units[key0]),inplace=True) + units[key] = units[key0] + except: + pass + for expr in postorder_traversal(expression): - - if expr.is_Atom: continue - ekey = str(expr) # expression key - + ekey = str(expr) if expr.is_Add: - # Addition will require that all args should be the same - # unit. As a convention, we will try to conform all units - # to the first variable's units. - key0 = None - for arg in expr.args: - key = str(arg) - if not args.has_key(key): continue - if key0 is None: - key0 = key - else: - # Conform these units to the units of the first arg - Units.conform(args[key], - Units(units[key]), - Units(units[key0]), - inplace=True) - units[key] = units[key0] - - args [ekey] = sympify(str(expr),locals=args) - units[ekey] = units[key0] + # if there are scalars in the expression, these will not + # be in the units dictionary. Add them and give them an + # implicit unit of 1 + keys = [str(arg) for arg in expr.args] + for key in keys: + if not units.has_key(key): units[key] = "1" - elif expr.is_Pow: + # if we are adding, all arguments must have the same unit. + key0 = keys[0] + for key in keys: + Unit(units[key]).convert(np.ones(1),Unit(units[key0])) + units[key] = units[key0] + units[ekey] = "%s" % (units[key0]) - assert len(expr.args) == 2 # check on an assumption - power = float(expr.args[1]) - args [ekey] = args[str(expr.args[0])]**power - units[ekey] = Units(units[str(expr.args[0])]) - units[ekey] = units[ekey]**power - - elif expr.is_Mul: + elif expr.is_Pow: - unit = Units("1") - for arg in expr.args: - key = str(arg) - if units.has_key(key): unit *= Units(units[key]) - - args [ekey] = sympify(str(expr),locals=args) - units[ekey] = Units(unit).formatted() + # if raising to a power, just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = "(%s)%s" % (units[keys[0]],keys[1]) - return args[ekey],units[ekey] + elif expr.is_Mul: + + # just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = " ".join(["(%s)" % units[key] for key in keys if units.has_key(key)]) + return sympify(str(expression),locals=args),units[ekey] + def ComputeIndexingArrays(lat2d,lon2d,lat,lon): """Blah. @@ -635,11 +638,11 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if depth_bnd_name is not None: depth_bnd = grp.variables[depth_bnd_name][...] if dunit is not None: - if not Units(dunit).equivalent(Units("m")): + if not Unit(dunit).is_convertible(Unit("m")): raise ValueError("Non-linear units [%s] of the layered dimension [%s] in %s" % (dunit,depth_name,filename)) - depth = Units.conform(depth,Units(dunit),Units("m"),inplace=True) + depth = Unit(dunit).convert(depth,Unit("m"),inplace=True) if depth_bnd is not None: - depth_bnd = Units.conform(depth_bnd,Units(dunit),Units("m"),inplace=True) + depth_bnd = Unit(dunit).convert(depth_bnd,Unit("m"),inplace=True) if data_name is not None: data = len(grp.dimensions[data_name]) @@ -701,16 +704,15 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if "missing_value" in var.ncattrs(): mask += (np.abs(v-var.missing_value)<1e-12) v = np.ma.masked_array(v,mask=mask,copy=False) - # handle units problems that cfunits doesn't if "units" in var.ncattrs(): - units = var.units.replace("unitless","1") + units = FixDumbUnits(var.units) else: units = "1" dset.close() return v,units,variable_name,t,t_bnd,lat,lat_bnd,lon,lon_bnd,depth,depth_bnd,cbounds,data -def Score(var,normalizer,FC=0.999999): +def Score(var,normalizer): """Remaps a normalized variable to the interval [0,1]. Parameters @@ -726,16 +728,7 @@ def Score(var,normalizer,FC=0.999999): name = name.replace("rmse","rmse_score") name = name.replace("iav" ,"iav_score") np.seterr(over='ignore',under='ignore') - - data = None - if "bias" in var.name or "diff" in var.name: - deno = np.ma.copy(normalizer.data) - if (deno.size - deno.mask.sum()) > 1: deno -= deno.min()*FC - data = np.exp(-np.abs(var.data/deno)) - elif "rmse" in var.name: - data = np.exp(-var.data/normalizer.data) - elif "iav" in var.name: - data = np.exp(-np.abs(var.data/normalizer.data)) + data = np.exp(-np.abs(var.data/normalizer.data)) data[data<1e-16] = 0. np.seterr(over='raise',under='raise') return Variable(name = name, @@ -810,7 +803,7 @@ def _composeGrids(v1,v2): lon = lon_bnds.mean(axis=1) return lat,lon,lat_bnds,lon_bnds -def AnalysisMeanState(ref,com,**keywords): +def AnalysisMeanStateSites(ref,com,**keywords): """Perform a mean state analysis. This mean state analysis examines the model mean state in space @@ -848,6 +841,7 @@ def AnalysisMeanState(ref,com,**keywords): the unit to use when displaying output on plots on the HTML page """ + from Variable import Variable regions = keywords.get("regions" ,["global"]) dataset = keywords.get("dataset" ,None) @@ -860,14 +854,14 @@ def AnalysisMeanState(ref,com,**keywords): skip_iav = keywords.get("skip_iav" ,False) skip_cycle = keywords.get("skip_cycle" ,False) ILAMBregions = Regions() - spatial = ref.spatial + spatial = False normalizer = None # Only study the annual cycle if it makes sense if not ref.monthly: skip_cycle = True if ref.time.size < 12: skip_cycle = True - - # We find + if skip_rmse : skip_iav = True + if spatial: lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) @@ -926,14 +920,58 @@ def AnalysisMeanState(ref,com,**keywords): # Compute the bias, RMSE, and RMS maps using the interpolated # quantities bias = REF_timeint.bias(COM_timeint) - bias_score_map = Score(bias,REF_timeint) + cREF = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata) + crms = cREF.rms () + bias_score_map = Score(bias,crms) if spatial: bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask if not skip_rmse: - rmse = REF.rmse(COM) - rms = REF.rms () - rmse_score_map = Score(rmse,rms) - + cCOM = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata) + rmse = REF.rmse( COM) + crmse = cREF.rmse(cCOM) + rmse_score_map = Score(crmse,crms) + if not skip_iav: + ref_iav = Variable(name = "centralized %s" % ref.name, unit = ref.unit, + data = np.ma.masked_array(ref.data-ref_timeint.data[np.newaxis,...],mask=ref.data.mask), + time = ref.time, time_bnds = ref.time_bnds, + lat = ref.lat , lat_bnds = ref.lat_bnds, + lon = ref.lon , lon_bnds = ref.lon_bnds, + area = ref.area, ndata = ref.ndata).rms() + com_iav = Variable(name = "centralized %s" % com.name, unit = com.unit, + data = np.ma.masked_array(com.data-com_timeint.data[np.newaxis,...],mask=com.data.mask), + time = com.time, time_bnds = com.time_bnds, + lat = com.lat , lat_bnds = com.lat_bnds, + lon = com.lon , lon_bnds = com.lon_bnds, + area = com.area, ndata = com.ndata).rms() + REF_iav = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata).rms() + COM_iav = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata).rms() + iav_score_map = Score(Variable(name = "diff %s" % REF.name, unit = REF.unit, + data = (COM_iav.data-REF_iav.data), + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata), + REF_iav) + # The phase shift comes from the interpolated quantities if not skip_cycle: ref_cycle = REF.annualCycle() @@ -948,7 +986,7 @@ def AnalysisMeanState(ref,com,**keywords): ref_period_mean = {}; ref_spaceint = {}; ref_mean_cycle = {}; ref_dtcycle = {} com_period_mean = {}; com_spaceint = {}; com_mean_cycle = {}; com_dtcycle = {} bias_val = {}; bias_score = {}; rmse_val = {}; rmse_score = {} - space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {} + space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {}; iav_score = {} ref_union_mean = {}; ref_comp_mean = {} com_union_mean = {}; com_comp_mean = {} for region in regions: @@ -975,6 +1013,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region] = rmse .integrateInSpace(region=region,mean=True) rmse_score [region] = rmse_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) space_std[region],space_cor[region],sd_score[region] = REF_timeint.spatialDistribution(COM_timeint,region=region) else: ref_period_mean[region] = ref_timeint .siteStats(region=region) @@ -995,6 +1035,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region] = rmse .siteStats(region=region) rmse_score [region] = rmse_score_map .siteStats(region=region,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .siteStats(region=region,weight=normalizer) ref_period_mean[region].name = "Period Mean (original grids) %s" % (region) ref_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) @@ -1005,6 +1047,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region].name = "RMSE %s" % (region) rmse_score [region].name = "RMSE Score %s" % (region) + if not skip_iav: + iav_score [region].name = "Interannual Variability Score %s" % (region) if not skip_cycle: ref_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) ref_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) @@ -1033,6 +1077,7 @@ def _convert(var,unit): plot_vars = [com_timeint,ref_timeint,bias,com_spaceint,ref_spaceint,bias_val] if not skip_rmse: plot_vars += [rmse,rmse_val] if not skip_cycle: plot_vars += [com_mean_cycle,ref_mean_cycle,com_dtcycle,ref_dtcycle] + if not skip_iav: plot_vars += [com_iav] for var in plot_vars: _convert(var,plot_unit) # Rename and optionally dump out information to netCDF4 files @@ -1064,13 +1109,17 @@ def _convert(var,unit): out_vars.append(shift_score_map) if not skip_rmse: rmse .name = "rmse_map_of_%s" % ref.name - rms .name = "rms_map_of_%s" % ref.name rmse_score_map.name = "rmsescore_map_of_%s" % ref.name out_vars.append(rmse) - out_vars.append(rms ) out_vars.append(rmse_score_map) out_vars.append(rmse_val) out_vars.append(rmse_score) + if not skip_iav: + com_iav.name = "iav_map_of_%s" % ref.name + iav_score_map.name = "iavscore_map_of_%s" % ref.name + out_vars.append(com_iav) + out_vars.append(iav_score_map) + out_vars.append(iav_score) if dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1089,6 +1138,9 @@ def _convert(var,unit): if not skip_cycle: ref_maxt_map.name = "phase_map_of_%s" % ref.name out_vars += [ref_maxt_map,ref_mean_cycle,ref_dtcycle] + if not skip_iav: + ref_iav.name = "iav_map_of_%s" % ref.name + out_vars.append(ref_iav) if benchmark_dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1097,124 +1149,303 @@ def _convert(var,unit): var.toNetCDF4(benchmark_dataset,group="MeanState") return - -def AnalysisRelationship(dep_var,ind_var,dataset,rname,**keywords): - """Perform a relationship analysis. - - Expand to provide details of what exactly is done. + +def AnalysisMeanStateSpace(ref,com,**keywords): + """Perform a mean state analysis. + + This mean state analysis examines the model mean state in space + and time. We compute the mean variable value over the time period + at each spatial cell or data site as appropriate, as well as the + bias and RMSE relative to the observational variable. We will + output maps of the period mean values and bias. For each spatial + cell or data site we also estimate the phase of the variable by + finding the mean time of year when the maximum occurs and the + phase shift by computing the difference in phase with respect to + the observational variable. In the spatial dimension, we compute a + spatial mean for each of the desired regions and an average annual + cycle. Parameters ---------- - dep_var : ILAMB.Variable.Variable - the dependent variable - ind_var : ILAMB.Variable.Variable - the independent variable - dataset : netCDF4.Dataset + obs : ILAMB.Variable.Variable + the observational (reference) variable + mod : ILAMB.Variable.Variable + the model (comparison) variable + regions : list of str, optional + the regions overwhich to apply the analysis + dataset : netCDF4.Dataset, optional a open dataset in write mode for caching the results of the analysis which pertain to the model - rname : str - the name of the relationship under study - regions : list of str, optional - a list of units over which to apply the analysis - dep_plot_unit,ind_plot_unit : str, optional - the name of the unit to use in the plots found on the HTML output - - """ - def _extractMaxTemporalOverlap(v1,v2): # should move? - t0 = max(v1.time.min(),v2.time.min()) - tf = min(v1.time.max(),v2.time.max()) - for v in [v1,v2]: - begin = np.argmin(np.abs(v.time-t0)) - end = np.argmin(np.abs(v.time-tf))+1 - v.time = v.time[begin:end] - v.data = v.data[begin:end,...] - mask = v1.data.mask + v2.data.mask - v1 = v1.data[mask==0].flatten() - v2 = v2.data[mask==0].flatten() - return v1,v2 - - # grab regions - regions = keywords.get("regions",["global"]) + benchmark_dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the observations + space_mean : bool, optional + disable to compute sums of the variable over space instead of + mean values + table_unit : str, optional + the unit to use when displaying output in tables on the HTML page + plots_unit : str, optional + the unit to use when displaying output on plots on the HTML page + + """ + from Variable import Variable + regions = keywords.get("regions" ,["global"]) + dataset = keywords.get("dataset" ,None) + benchmark_dataset = keywords.get("benchmark_dataset",None) + space_mean = keywords.get("space_mean" ,True) + table_unit = keywords.get("table_unit" ,None) + plot_unit = keywords.get("plot_unit" ,None) + mass_weighting = keywords.get("mass_weighting" ,False) + skip_rmse = keywords.get("skip_rmse" ,False) + skip_iav = keywords.get("skip_iav" ,False) + skip_cycle = keywords.get("skip_cycle" ,False) + ILAMBregions = Regions() + spatial = ref.spatial + + # Convert str types to booleans + if type(skip_rmse) == type(""): + skip_rmse = (skip_rmse.lower() == "true") + if type(skip_iav ) == type(""): + skip_iav = (skip_iav .lower() == "true") + if type(skip_cycle) == type(""): + skip_cycle = (skip_cycle.lower() == "true") - # convert to plot units - dep_plot_unit = keywords.get("dep_plot_unit",dep_var.unit) - ind_plot_unit = keywords.get("ind_plot_unit",ind_var.unit) - if dep_plot_unit is not None: dep_var.convert(dep_plot_unit) - if ind_plot_unit is not None: ind_var.convert(ind_plot_unit) - - # if the variables are temporal, we need to get period means - if dep_var.temporal: dep_var = dep_var.integrateInTime(mean=True) - if ind_var.temporal: ind_var = ind_var.integrateInTime(mean=True) - mask = dep_var.data.mask + ind_var.data.mask - - # analysis over regions - for region in regions: + # Check if we need to skip parts of the analysis + if not ref.monthly : skip_cycle = True + if ref.time.size < 12: skip_cycle = True + if ref.time.size == 1: skip_rmse = True + if skip_rmse : skip_iav = True + name = ref.name + + # Interpolate both reference and comparison to a grid composed of + # their cell breaks + ref.convert(plot_unit) + com.convert(plot_unit) + lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) + REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + unit = REF.unit + area = REF.area + ndata = REF.ndata + + # Find the mean values over the time period + ref_timeint = ref.integrateInTime(mean=True).convert(plot_unit) + com_timeint = com.integrateInTime(mean=True).convert(plot_unit) + REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) + COM_timeint = COM.integrateInTime(mean=True).convert(plot_unit) + normalizer = REF_timeint.data if mass_weighting else None + + # Report period mean values over all possible representations of + # land + ref_and_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == False) + ref_not_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == True ) + com_not_ref = (REF_timeint.data.mask == True ) * (COM_timeint.data.mask == False) + if benchmark_dataset is not None: - lats,lons = ILAMBregions[region] - rmask = (np.outer((dep_var.lat>lats[0])*(dep_var.latlons[0])*(dep_var.lon 1 else REF_timeint) + bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask + if dataset is not None: + bias.name = "bias_map_of_%s" % name + bias.toNetCDF4(dataset,group="MeanState") + bias_score_map.name = "biasscore_map_of_%s" % name + bias_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) + bias_val.name = "Bias %s" % region + bias_val.toNetCDF4(dataset,group="MeanState") + bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + bias_score.name = "Bias Score %s" % region + bias_score.toNetCDF4(dataset,group="MeanState") + del bias,bias_score_map + + # Spatial mean: plots + if REF.time.size > 1: + if benchmark_dataset is not None: + for region in regions: + ref_spaceint = REF.integrateInSpace(region=region,mean=True) + ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + ref_spaceint.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + for region in regions: + com_spaceint = COM.integrateInSpace(region=region,mean=True) + com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + com_spaceint.toNetCDF4(dataset,group="MeanState") + + # RMSE: maps, scalars, and scores + if not skip_rmse: + rmse = REF.rmse(COM).convert(plot_unit) + del REF + cCOM = Variable(name = "centralized %s" % name, unit = unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = COM.area, ndata = COM.ndata).convert(plot_unit) + del COM + crmse = cREF.rmse(cCOM).convert(plot_unit) + del cREF + if skip_iav: del cCOM + rmse_score_map = Score(crmse,REF_iav) + if dataset is not None: + rmse.name = "rmse_map_of_%s" % name + rmse.toNetCDF4(dataset,group="MeanState") + rmse_score_map.name = "rmsescore_map_of_%s" % name + rmse_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) + rmse_val.name = "RMSE %s" % region + rmse_val.toNetCDF4(dataset,group="MeanState") + rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + rmse_score.name = "RMSE Score %s" % region + rmse_score.toNetCDF4(dataset,group="MeanState") + del rmse,crmse,rmse_score_map + + # IAV: maps, scalars, scores + if not skip_iav: + COM_iav = cCOM.rms() + del cCOM + iav_score_map = Score(Variable(name = "diff %s" % name, unit = unit, + data = (COM_iav.data-REF_iav.data), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = area, ndata = ndata), + REF_iav) + if benchmark_dataset is not None: + REF_iav.name = "iav_map_of_%s" % name + REF_iav.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + COM_iav.name = "iav_map_of_%s" % name + COM_iav.toNetCDF4(dataset,group="MeanState") + iav_score_map.name = "iavscore_map_of_%s" % name + iav_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + iav_score = iav_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + iav_score.name = "Interannual Variability Score %s" % region + iav_score.toNetCDF4(dataset,group="MeanState") + del COM_iav,iav_score_map + del REF_iav + + return def ClipTime(v,t0,tf): """Remove time from a variable based on input bounds. @@ -1300,10 +1531,10 @@ def MakeComparable(ref,com,**keywords): # If the reference is spatial, the comparison must be if ref.spatial and not com.spatial: - msg = "%s Datasets are not uniformly spatial: " % logstring - msg += "reference = %s, comparison = %s" % (ref.spatial,com.spatial) - logger.debug(msg) - raise VarsNotComparable() + ref = ref.extractDatasites(com.lat,com.lon) + msg = "%s The reference dataset is spatial but the comparison is site-based. " % logstring + msg += "Extracted %s sites from the reference to match the comparison." % ref.ndata + logger.info(msg) # If the reference is layered, the comparison must be if ref.layered and not com.layered: @@ -1383,7 +1614,7 @@ def MakeComparable(ref,com,**keywords): # comparison, coarsen the comparison if np.log10(ref.dt/com.dt) > 0.5: com = com.coarsenInTime(ref.time_bnds,window=window) - + # Time bounds of the reference dataset t0 = ref.time_bnds[ 0,0] tf = ref.time_bnds[-1,1] diff --git a/test/scores_test.csv.gold b/test/scores_test.csv.gold index 6d65ab7b..6fccfb93 100644 --- a/test/scores_test.csv.gold +++ b/test/scores_test.csv.gold @@ -1,9 +1,9 @@ Variables,CLM50r243CRUNCEP,CLM50r243GSWP3 -Biomass,0.595710463937,0.678304573522 -Gross Primary Productivity,0.753476728464,0.741270301037 -Global Net Ecosystem Carbon Balance,0.705400063727,0.863669079462 -Net Ecosystem Exchange,0.524058275106,0.504338904659 -Terrestrial Water Storage Anomaly,0.484015616221,0.470205924215 -Albedo,0.771776381299,0.774604472682 -Surface Air Temperature,0.988457088529,0.990624010352 -Precipitation,0.812343937554,0.824581872315 +Biomass,0.5957104653413856,0.6783045750117078 +Gross Primary Productivity,0.6217211297637607,0.6126273585798891 +Global Net Ecosystem Carbon Balance,0.7054000637266042,0.8636690794621101 +Net Ecosystem Exchange,0.3941918077804778,0.38120476926634617 +Terrestrial Water Storage Anomaly,0.7000653021257858,0.7269702240175762 +Albedo,0.5434663466148166,0.544587485316599 +Surface Air Temperature,0.9256731031865132,0.9314748385926337 +Precipitation,0.7555153501937276,0.7679655805094326 From 419b98f017237ed349601223ced5474e68218ecb Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Thu, 16 Aug 2018 16:50:03 -0600 Subject: [PATCH 16/22] update ILAMB subtree to v2.3 --- ilamb/Makefile | 35 - ilamb/ilamb/.gitignore | 15 - ilamb/ilamb/README.rst | 70 - ilamb/ilamb/bin/ilamb-doctor | 134 -- ilamb/ilamb/bin/ilamb-fetch | 97 -- ilamb/ilamb/bin/ilamb-mean | 177 -- ilamb/ilamb/bin/ilamb-run | 574 ------- ilamb/ilamb/bin/ilamb-table | 65 - ilamb/ilamb/demo/diurnal.cfg | 76 - ilamb/ilamb/demo/ilamb.cfg | 544 ------ ilamb/ilamb/demo/iomb.cfg | 65 - ilamb/ilamb/demo/sample.cfg | 18 - ilamb/ilamb/doc/Makefile | 186 --- ilamb/ilamb/doc/_templates/class.rst | 25 - ilamb/ilamb/doc/add_data.rst | 150 -- ilamb/ilamb/doc/add_model.rst | 117 -- ilamb/ilamb/doc/conf.py | 357 ---- ilamb/ilamb/doc/confront.rst | 294 ---- ilamb/ilamb/doc/custom_regions.rst | 124 -- ilamb/ilamb/doc/first_steps.rst | 233 --- ilamb/ilamb/doc/format_data.rst | 220 --- ilamb/ilamb/doc/ilamb_doctor.rst | 61 - ilamb/ilamb/doc/ilamb_fetch.rst | 37 - ilamb/ilamb/doc/ilamb_run.rst | 177 -- ilamb/ilamb/doc/index.rst | 39 - ilamb/ilamb/doc/install.rst | 278 ---- ilamb/ilamb/doc/overview.rst | 203 --- ilamb/ilamb/doc/packageapi.rst | 132 -- ilamb/ilamb/doc/tutorial.rst | 39 - ilamb/ilamb/setup.py | 109 -- ilamb/ilamb/src/ILAMB/ConfDiurnal.py | 199 --- ilamb/ilamb/src/ILAMB/ConfEvapFraction.py | 65 - ilamb/ilamb/src/ILAMB/ConfIOMB.py | 613 ------- ilamb/ilamb/src/ILAMB/ConfNBP.py | 172 -- ilamb/ilamb/src/ILAMB/ConfPermafrost.py | 223 --- ilamb/ilamb/src/ILAMB/ConfRunoff.py | 215 --- ilamb/ilamb/src/ILAMB/ConfTWSA.py | 247 --- ilamb/ilamb/src/ILAMB/Confrontation.py | 1185 -------------- ilamb/ilamb/src/ILAMB/ModelResult.py | 344 ---- ilamb/ilamb/src/ILAMB/Post.py | 1048 ------------ ilamb/ilamb/src/ILAMB/Regions.py | 193 --- ilamb/ilamb/src/ILAMB/Scoreboard.py | 856 ---------- ilamb/ilamb/src/ILAMB/Variable.py | 1761 -------------------- ilamb/ilamb/src/ILAMB/__init__.py | 35 - ilamb/ilamb/src/ILAMB/constants.py | 224 --- ilamb/ilamb/src/ILAMB/ilamblib.py | 1821 --------------------- ilamb/ilamb/test/makefile | 15 - ilamb/ilamb/test/score_diff.py | 18 - ilamb/ilamb/test/scores_test.csv.gold | 9 - ilamb/ilamb/test/test.cfg | 134 -- ilamb/ilamb/test/test_Variable.py | 157 -- ilamb/ilamb/test/test_run_script.py | 17 - 52 files changed, 14202 deletions(-) delete mode 100644 ilamb/Makefile delete mode 100644 ilamb/ilamb/.gitignore delete mode 100644 ilamb/ilamb/README.rst delete mode 100644 ilamb/ilamb/bin/ilamb-doctor delete mode 100644 ilamb/ilamb/bin/ilamb-fetch delete mode 100644 ilamb/ilamb/bin/ilamb-mean delete mode 100644 ilamb/ilamb/bin/ilamb-run delete mode 100644 ilamb/ilamb/bin/ilamb-table delete mode 100644 ilamb/ilamb/demo/diurnal.cfg delete mode 100644 ilamb/ilamb/demo/ilamb.cfg delete mode 100644 ilamb/ilamb/demo/iomb.cfg delete mode 100644 ilamb/ilamb/demo/sample.cfg delete mode 100644 ilamb/ilamb/doc/Makefile delete mode 100644 ilamb/ilamb/doc/_templates/class.rst delete mode 100644 ilamb/ilamb/doc/add_data.rst delete mode 100644 ilamb/ilamb/doc/add_model.rst delete mode 100644 ilamb/ilamb/doc/conf.py delete mode 100644 ilamb/ilamb/doc/confront.rst delete mode 100644 ilamb/ilamb/doc/custom_regions.rst delete mode 100644 ilamb/ilamb/doc/first_steps.rst delete mode 100644 ilamb/ilamb/doc/format_data.rst delete mode 100644 ilamb/ilamb/doc/ilamb_doctor.rst delete mode 100644 ilamb/ilamb/doc/ilamb_fetch.rst delete mode 100644 ilamb/ilamb/doc/ilamb_run.rst delete mode 100644 ilamb/ilamb/doc/index.rst delete mode 100644 ilamb/ilamb/doc/install.rst delete mode 100644 ilamb/ilamb/doc/overview.rst delete mode 100644 ilamb/ilamb/doc/packageapi.rst delete mode 100644 ilamb/ilamb/doc/tutorial.rst delete mode 100644 ilamb/ilamb/setup.py delete mode 100644 ilamb/ilamb/src/ILAMB/ConfDiurnal.py delete mode 100644 ilamb/ilamb/src/ILAMB/ConfEvapFraction.py delete mode 100644 ilamb/ilamb/src/ILAMB/ConfIOMB.py delete mode 100644 ilamb/ilamb/src/ILAMB/ConfNBP.py delete mode 100644 ilamb/ilamb/src/ILAMB/ConfPermafrost.py delete mode 100644 ilamb/ilamb/src/ILAMB/ConfRunoff.py delete mode 100644 ilamb/ilamb/src/ILAMB/ConfTWSA.py delete mode 100644 ilamb/ilamb/src/ILAMB/Confrontation.py delete mode 100644 ilamb/ilamb/src/ILAMB/ModelResult.py delete mode 100644 ilamb/ilamb/src/ILAMB/Post.py delete mode 100644 ilamb/ilamb/src/ILAMB/Regions.py delete mode 100644 ilamb/ilamb/src/ILAMB/Scoreboard.py delete mode 100644 ilamb/ilamb/src/ILAMB/Variable.py delete mode 100644 ilamb/ilamb/src/ILAMB/__init__.py delete mode 100644 ilamb/ilamb/src/ILAMB/constants.py delete mode 100644 ilamb/ilamb/src/ILAMB/ilamblib.py delete mode 100644 ilamb/ilamb/test/makefile delete mode 100644 ilamb/ilamb/test/score_diff.py delete mode 100644 ilamb/ilamb/test/scores_test.csv.gold delete mode 100644 ilamb/ilamb/test/test.cfg delete mode 100644 ilamb/ilamb/test/test_Variable.py delete mode 100644 ilamb/ilamb/test/test_run_script.py diff --git a/ilamb/Makefile b/ilamb/Makefile deleted file mode 100644 index 9ec599b3..00000000 --- a/ilamb/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -all : develop - -test : FORCE - cd ilamb/test; python -c "import ILAMB; print ILAMB.__version__" - -develop : install - -# NOTE: need to cd to ilamb dir first before setup because -# the Makefile to build and install in the virtualenv is -# different from the distibution Makefile. - -install : FORCE - cd ilamb; python setup.py install - -# -clean : - -rm -f *~ *.CKP *.ln *.BAK *.bak .*.bak \ - core errs \ - ,* .emacs_* \ - tags TAGS \ - make.log MakeOut \ - *.tmp tmp.txt - -# -# clobber - Really clean up the directory. -# -clobber : clean - -rm -f .Makedepend *.o *.mod *.il *.pyc - -rm -rf ilamb/*.egg-info - -rm -rf ilamb/build - -# -# FORCE - Null rule to force things to happen. -# -FORCE : diff --git a/ilamb/ilamb/.gitignore b/ilamb/ilamb/.gitignore deleted file mode 100644 index e1b1aa8a..00000000 --- a/ilamb/ilamb/.gitignore +++ /dev/null @@ -1,15 +0,0 @@ -.gitignore -RDict.db -RDict.log -*.pyc -*~ -*.orig -#*# -*# -*.DS_Store -*build* -*graveyard* -*.egg-info -dist -*_generated* -src/ILAMB/generated_version.py \ No newline at end of file diff --git a/ilamb/ilamb/README.rst b/ilamb/ilamb/README.rst deleted file mode 100644 index ebbe75ee..00000000 --- a/ilamb/ilamb/README.rst +++ /dev/null @@ -1,70 +0,0 @@ -The ILAMB Benchmarking System -============================= - -The International Land Model Benchmarking (ILAMB) project is a -model-data intercomparison and integration project designed to improve -the performance of land models and, in parallel, improve the design of -new measurement campaigns to reduce uncertainties associated with key -land surface processes. Building upon past model evaluation studies, -the goals of ILAMB are to: - -* develop internationally accepted benchmarks for land model - performance, promote the use of these benchmarks by the - international community for model intercomparison, -* strengthen linkages between experimental, remote sensing, and - climate modeling communities in the design of new model tests and - new measurement programs, and -* support the design and development of a new, open source, - benchmarking software system for use by the international community. - -It is the last of these goals to which this repository is -concerned. We have developed a python-based generic benchmarking -system, initially focused on assessing land model performance. - -Useful Information ------------------- - -* `Documentation `_ - installation and - basic usage tutorials -* Sample Output - - * `CLM `_ - land comparison against 3 CLM versions and 2 forcings - * `CMIP5 `_ - land comparison against a collection of CMIP5 models - * `IOMB `_ - ocean comparison against a few ocean models - -* Paper `preprint `_ which - details the design and methodology employed in the ILAMB package -* If you find the package or the ouput helpful in your research or - development efforts, we kindly ask you to cite the following - reference (DOI:10.18139/ILAMB.v002.00/1251621). - -ILAMB 2.3 Release ------------------ - -We are pleased to announce version 2.3 of the ILAMB python -package. Among many bugfixes and improvements we highlight these major -changes: - -* You may observe a large shift in some score values. In this version - we solidified our scoring methodology while writing a `paper - `_ which necesitated - reworking some of the scores. For details, see the linked paper. -* Made a memory optimization pass through the analysis routines. Peak - memory usage and the time at peak was reduced improving performance. -* Restructured the symbolic manipulation of derived variables to - greatly reduce the required memory. -* Moved from using cfunits to cf_units. Both are python wrappers - around the UDUNITS library, but cfunits is stagnant and placed a - lower limit to the version of the netCDF4 python wrappers we could - use. -* The scoring of the interannual variability was missed in the port - from version 1 to 2, we have added the metric. -* The terrestrial water storage anomaly GRACE metric was changed to - compare mean anomaly values over large river basins. For details see - the ILAMB paper. - - -Funding -------- - -This research was performed for the *Reducing Uncertainties in Biogeochemical Interactions through Synthesis and Computation* (RUBISCO) Scientific Focus Area, which is sponsored by the Regional and Global Climate Modeling (RGCM) Program in the Climate and Environmental Sciences Division (CESD) of the Biological and Environmental Research (BER) Program in the U.S. Department of Energy Office of Science. diff --git a/ilamb/ilamb/bin/ilamb-doctor b/ilamb/ilamb/bin/ilamb-doctor deleted file mode 100644 index cceaa17e..00000000 --- a/ilamb/ilamb/bin/ilamb-doctor +++ /dev/null @@ -1,134 +0,0 @@ -#!/usr/bin/env python -""" -""" -from ILAMB.ModelResult import ModelResult -from ILAMB.Scoreboard import Scoreboard -from ILAMB.Regions import Regions -from ILAMB import ilamblib as il -import os,time,sys,argparse -import numpy as np -import datetime,glob - -# Some color constants for printing to the terminal -OK = '\033[92m' -FAIL = '\033[91m' -ENDC = '\033[0m' - -def InitializeModels(model_root,models=[],verbose=False,filter="",model_year=[]): - """Initializes a list of models - - Initializes a list of models where each model is the subdirectory - beneath the given model root directory. The global list of models - will exist on each processor. - - Parameters - ---------- - model_root : str - the directory whose subdirectories will become the model results - models : list of str, optional - only initialize a model whose name is in this list - verbose : bool, optional - enable to print information to the screen - model_year : 2-tuple, optional - shift model years from the first to the second part of the tuple - - Returns - ------- - M : list of ILAMB.ModelResults.ModelsResults - a list of the model results, sorted alphabetically by name - - """ - # initialize the models - M = [] - if len(model_year) != 2: model_year = None - max_model_name_len = 0 - if verbose: print "\nSearching for model results in %s\n" % model_root - for subdir, dirs, files in os.walk(model_root): - for mname in dirs: - if len(models) > 0 and mname not in models: continue - M.append(ModelResult(os.path.join(subdir,mname), modelname = mname, filter=filter, model_year = model_year)) - max_model_name_len = max(max_model_name_len,len(mname)) - break - M = sorted(M,key=lambda m: m.name.upper()) - - # assign unique colors - clrs = il.GenerateDistinctColors(len(M)) - for m in M: - clr = clrs.pop(0) - m.color = clr - - # optionally output models which were found - if verbose: - for m in M: - print (" {0:>45}").format(m.name) - - if len(M) == 0: - if verbose: print "No model results found" - sys.exit(1) - - return M - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument('--model_root', dest="model_root", metavar='root', type=str, nargs=1, default=["./"], - help='root at which to search for models') -parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, - help='path to configuration file to use') -parser.add_argument('--models', dest="models", metavar='m', type=str, nargs='+',default=[], - help='specify which models to run, list model names with no quotes and only separated by a space.') -parser.add_argument('--model_year', dest="model_year", metavar='y0 yf', type=int, nargs='+',default=[], - help='set to shift model years, "--model_year y0 yf" will shift years from y0 to yf') -parser.add_argument('--confrontations', dest="confront", metavar='c', type=str, nargs='+',default=[], - help='specify which confrontations to run, list confrontation names with no quotes and only separated by a space.') -parser.add_argument('-q','--quiet', dest="quiet", action="store_true", - help='enable to silence screen output') -parser.add_argument('--filter', dest="filter", metavar='filter', type=str, nargs=1, default=[""], - help='a string which much be in the model filenames') -parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], - help='path of where to save the output') - -args = parser.parse_args() -if args.config is None: - print "\nError: You must specify a configuration file using the option --config\n" - sys.exit(1) - -M = InitializeModels(args.model_root[0],args.models,not args.quiet,filter=args.filter[0],model_year=args.model_year) -S = Scoreboard(args.config[0], - verbose = False, - build_dir = args.build_dir[0]) - -max_name_len = 45 -max_m_len = 0 -for m in M: max_m_len = max(max_m_len,len(m.name)) - -print """ -We will now look in each model for the variables in the ILAMB -configure file you specified (%s). The color green is used to reflect -which variables were found in the model. The color red is used to -reflect that a model is missing a required variable.\n""" % (args.config[0]) -for m in M: - for c in S.list(): - ands,ors = c.requires() - ok = False - if len(ands) == 0: - tf = [m.variables.has_key(v) for v in ors] - ok = any(tf) - out = [("\033[92m%s\033[0m" % v) if t else v for v,t in zip(ors,tf)] - if len(out) > 1: out[-1] = "or %s" % out[-1] - if len(out) <= 2: - out = " " .join(out) - else: - out = ", ".join(out) - else: - tf = [m.variables.has_key(v) for v in ands] - ok = all(tf) - out = [("\033[92m%s\033[0m" % v) if t else ("\033[91m%s\033[0m" % v) for v,t in zip(ands,tf)] - if len(out) > 1: out[-1] = "and %s" % out[-1] - if len(out) <= 2: - out = " " .join(out) - else: - out = ", ".join(out) - - if ok: - print (" {0:>%d}\033[92m {1:<%d}\033[0m %s" % (max_name_len,max_m_len,out)).format(c.longname,m.name) - else: - print (" {0:>%d}\033[91m {1:<%d}\033[0m %s" % (max_name_len,max_m_len,out)).format(c.longname,m.name) diff --git a/ilamb/ilamb/bin/ilamb-fetch b/ilamb/ilamb/bin/ilamb-fetch deleted file mode 100644 index 9ad1c781..00000000 --- a/ilamb/ilamb/bin/ilamb-fetch +++ /dev/null @@ -1,97 +0,0 @@ -#!/usr/bin/env python -import hashlib,argparse,os,sys,urllib - -def BuildDirectories(filepath): - d = os.path.dirname(filepath) - if not os.path.isdir(d): - os.makedirs(d) - -def filehash(filepath): - blocksize = 64*1024 - sha = hashlib.sha1() - with open(filepath, 'rb') as fp: - while True: - data = fp.read(blocksize) - if not data: - break - sha.update(data) - return sha.hexdigest() - -def GenerateSha1sumFile(root,suffix=".nc"): - - lines = "" - for topdir, dirs, files in os.walk(root): - for fpath in [os.path.join(topdir, f) for f in files]: - if not fpath.endswith(suffix): continue - size = os.path.getsize(fpath) - sha = filehash(fpath) - name = os.path.relpath(fpath, root) - lines += '%s %s\n' % (sha,name) - return lines - -def CheckSha1sumFile(sha1sumfile,root): - - needs_updating = [] - with file(sha1sumfile) as f: - lines = f.readlines() - for line in lines: - line = line.split() - sha1sum,filename = line - fpath = os.path.join(root,filename) - if os.path.isfile(fpath): - if sha1sum != filehash(fpath): needs_updating.append(filename) - else: - needs_updating.append(filename) - return needs_updating - -# default value is ILAMB_ROOT if set -local_root = "./" -if os.environ.has_key("ILAMB_ROOT"): local_root = os.environ["ILAMB_ROOT"] - -# parse options -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument('--local_root', dest="local_root", metavar="PATH", type=str, default=local_root, - help='Location on your system.') -parser.add_argument('--remote_root', dest="remote_root", metavar="PATH", type=str, default="http://ilamb.ornl.gov/ILAMB-Data/", - help='Location on the remote system.') -parser.add_argument('-c', '--create', dest="create", action="store_true", - help='Enable to create a sha1sum check file of the contents of the local root') -args = parser.parse_args() - -# use create mode if you want to make a checksum file of a directory -if args.create: - f = file(args.local_root + "/SHA1SUM",mode="w") - f.write(GenerateSha1sumFile(args.local_root)) - f.close() - sys.exit() - -print "\nComparing remote location:\n\n\t%s\n\nTo local location:\n\n\t%s" % (args.remote_root,args.local_root) - -# download and build the sha1sum check files -urllib.urlretrieve(args.remote_root + "/SHA1SUM", - args.local_root + "/SHA1SUM") -if "404 Not Found" in file(args.local_root + "/SHA1SUM").read(): - raise ValueError("Could not find the sha1 sum file: %s" % (args.remote_root + "/SHA1SUM")) -needs_updating = CheckSha1sumFile(args.local_root + "/SHA1SUM",args.local_root) - -if len(needs_updating) == 0: - print "\nAll your data is up-to-date and clean.\n" - os.system("rm -f " + args.local_root + "/SHA1SUM") - sys.exit() - -print "\nI found the following files which are missing, out of date, or corrupt:\n" -for key in needs_updating: - print "\t%s/%s" % (args.local_root,key) - -reply = str(raw_input('\nDownload replacements? [y/n] ')).lower().strip() -if reply[0] == 'y': - print " " - for key in needs_updating: - print "\tDownloading %s/%s..." % (args.remote_root,key) - BuildDirectories(args.local_root + "/" + key) - urllib.urlretrieve(args.remote_root + "/" + key, - args.local_root + "/" + key) - print "\nDownload complete. Rerun ilamb-fetch to check file integrity.\n" - -os.system("rm -f " + args.local_root + "/SHA1SUM") - diff --git a/ilamb/ilamb/bin/ilamb-mean b/ilamb/ilamb/bin/ilamb-mean deleted file mode 100644 index e3e02681..00000000 --- a/ilamb/ilamb/bin/ilamb-mean +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env python -""" -Computes a multimodel mean. -""" -import os,sys,argparse -from ILAMB.ModelResult import ModelResult -from ILAMB.Variable import Variable -from netCDF4 import Dataset -import numpy as np - -def InitializeModels(model_root,models=[],verbose=False,filter="",model_year=[]): - """Initializes a list of models - - Initializes a list of models where each model is the subdirectory - beneath the given model root directory. The global list of models - will exist on each processor. - - Parameters - ---------- - model_root : str - the directory whose subdirectories will become the model results - models : list of str, optional - only initialize a model whose name is in this list - verbose : bool, optional - enable to print information to the screen - model_year : 2-tuple, optional - shift model years from the first to the second part of the tuple - - Returns - ------- - M : list of ILAMB.ModelResults.ModelsResults - a list of the model results, sorted alphabetically by name - - """ - # initialize the models - M = [] - if len(model_year) != 2: model_year = None - max_model_name_len = 0 - if verbose: print "\nSearching for model results in %s\n" % model_root - for subdir, dirs, files in os.walk(model_root): - for mname in dirs: - if len(models) > 0 and mname not in models: continue - M.append(ModelResult(os.path.join(subdir,mname), modelname = mname, filter=filter, model_year = model_year)) - max_model_name_len = max(max_model_name_len,len(mname)) - break - M = sorted(M,key=lambda m: m.name.upper()) - - # optionally output models which were found - if verbose: - for m in M: - print (" {0:>45}").format(m.name) - - if len(M) == 0: - if verbose: print "No model results found" - sys.exit(0) - - return M - - -def CombineModelVars(var,res=1.0): - """ - - """ - m0 = var.keys()[0] - t0 = -1e20; tf = +1e20 - lat0 = -1e20; latf = +1e20 - lon0 = -1e20; lonf = +1e20 - dep0 = -1e20; depf = +1e20 - for m in var: - var[m].convert(var[m0].unit) - assert var[m].data.ndim == var[m0].data.ndim - if var[m].time is not None: - t0 = max(var[m].time_bnds.min(),t0) - tf = min(var[m].time_bnds.max(),tf) - if var[m].lat is not None: - lat0 = max(var[m].lat_bnds.min(),lat0) - latf = min(var[m].lat_bnds.max(),latf) - if var[m].lon is not None: - lon0 = max(var[m].lon_bnds.min(),lon0) - lonf = min(var[m].lon_bnds.max(),lonf) - if var[m].depth is not None: - dep0 = max(var[m].depth_bnds.min(),dep0) - depf = min(var[m].depth_bnds.max(),depf) - lat0 = max(lat0,- 90.); latf = min(latf,+ 90.) - lon0 = max(lon0,-180.); lonf = min(lonf,+180.) - - # Create space/time grid - var[m0].trim(t=[t0,tf]) - t = np.copy(var[m0].time) - t_bnds = np.copy(var[m0].time_bnds) - lat_bnds = np.arange(lat0,latf+res/2.,res) - lon_bnds = np.arange(lon0,lonf+res/2.,res) - lat = 0.5*(lat_bnds[:-1]+lat_bnds[1:]) - lon = 0.5*(lon_bnds[:-1]+lon_bnds[1:]) - lat_bnd = np.zeros((lat.size,2)) - lon_bnd = np.zeros((lon.size,2)) - lat_bnd[:,0] = lat_bnds[:-1]; lat_bnd[:,1] = lat_bnds[+1:] - lon_bnd[:,0] = lon_bnds[:-1]; lon_bnd[:,1] = lon_bnds[+1:] - - shp = () - if t0 > -1e20: shp += (t .size,) - if lat0 > -1e20: shp += (lat.size,) - if lon0 > -1e20: shp += (lon.size,) - dsum = np.zeros(shp) - count = np.zeros(shp,dtype=int) - - for m in var: - print " Averaging in %s..." % m - intv = var[m].interpolate(time=t,lat=lat,lon=lon) - dsum += (intv.data.mask==0)*intv.data - count += (intv.data.mask==0) - - dsum = np.ma.masked_array(dsum,mask=(count==0)) - dsum /= count.clip(1) - return Variable(data = dsum, - unit = var[m0].unit, - name = var[m0].name, - time = t, - time_bnds = t_bnds, - lat = lat, - lat_bnds = lat_bnd, - lon = lon, - lon_bnds = lon_bnd) - -def CreateMeanModel(M,res=1.0): - """ - - """ - def _keep(v): - for keep in ["_bnds","time","lat","lon","layer","depth","lev","areacella"]: - if keep in v: return False - return True - - # Find a list of variables across all models - Vs = [] - for m in M: Vs += [v for v in m.variables.keys() if ((v not in Vs) and (_keep(v)))] - - # Create space/time grid - lat_bnds = np.arange(- 90, 90+res/2.,res) - lon_bnds = np.arange(-180,180+res/2.,res) - lat = 0.5*(lat_bnds[:-1]+lat_bnds[1:]) - lon = 0.5*(lon_bnds[:-1]+lon_bnds[1:]) - lat_bnd = np.zeros((lat.size,2)) - lon_bnd = np.zeros((lon.size,2)) - lat_bnd[:,0] = lat_bnds[:-1]; lat_bnd[:,1] = lat_bnds[+1:] - lon_bnd[:,0] = lon_bnds[:-1]; lon_bnd[:,1] = lon_bnds[+1:] - - for v in Vs: - print v - var = {} - try: - for m in M: - if not m.variables.has_key(v): continue - print " Reading from %s..." % m.name - var[m.name] = m.extractTimeSeries(v) - if len(var) == 1: raise ValueError - mean = CombineModelVars(var) - with Dataset("%s_MeanModel.nc" % v,mode="w") as dset: - mean.toNetCDF4(dset) - print "Writing %s_MeanModel.nc...\n" % v - except: - print "Failed to create %s\n" % v - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument('--model_root', dest="model_root", metavar='root', type=str, nargs=1, default=["./"], - help='root at which to search for models') -parser.add_argument('--models', dest="models", metavar='m', type=str, nargs='+',default=[], - help='specify which models to run, list model names with no quotes and only separated by a space.') -parser.add_argument('--filter', dest="filter", metavar='filter', type=str, nargs=1, default=[""], - help='a string which much be in the model filenames') -parser.add_argument('-q','--quiet', dest="quiet", action="store_true", - help='enable to silence screen output') - -args = parser.parse_args() - -M = InitializeModels(args.model_root[0],args.models,not args.quiet,filter=args.filter[0]) -CreateMeanModel(M) diff --git a/ilamb/ilamb/bin/ilamb-run b/ilamb/ilamb/bin/ilamb-run deleted file mode 100644 index 03f7b9d5..00000000 --- a/ilamb/ilamb/bin/ilamb-run +++ /dev/null @@ -1,574 +0,0 @@ -#!/usr/bin/env python -""" -Runs an ILAMB study. -""" -import logging -from ILAMB.ModelResult import ModelResult -from ILAMB.Scoreboard import Scoreboard -from ILAMB.Regions import Regions -from ILAMB import ilamblib as il -from traceback import format_exc -import os,time,sys,argparse -from mpi4py import MPI -import numpy as np -import datetime,glob - -# MPI stuff -comm = MPI.COMM_WORLD -size = comm.Get_size() -rank = comm.Get_rank() -proc = np.zeros(size) - -# Some color constants for printing to the terminal -OK = '\033[92m' -FAIL = '\033[91m' -ENDC = '\033[0m' - -def InitializeModels(model_root,models=[],verbose=False,filter="",model_year=[]): - """Initializes a list of models - - Initializes a list of models where each model is the subdirectory - beneath the given model root directory. The global list of models - will exist on each processor. - - Parameters - ---------- - model_root : str - the directory whose subdirectories will become the model results - models : list of str, optional - only initialize a model whose name is in this list - verbose : bool, optional - enable to print information to the screen - model_year : 2-tuple, optional - shift model years from the first to the second part of the tuple - - Returns - ------- - M : list of ILAMB.ModelResults.ModelsResults - a list of the model results, sorted alphabetically by name - - """ - # initialize the models - M = [] - if len(model_year) != 2: model_year = None - max_model_name_len = 0 - if rank == 0 and verbose: print "\nSearching for model results in %s\n" % model_root - for subdir, dirs, files in os.walk(model_root): - for mname in dirs: - if len(models) > 0 and mname not in models: continue - M.append(ModelResult(os.path.join(subdir,mname), modelname = mname, filter=filter, model_year = model_year)) - max_model_name_len = max(max_model_name_len,len(mname)) - break - M = sorted(M,key=lambda m: m.name.upper()) - - # assign unique colors - clrs = il.GenerateDistinctColors(len(M)) - for m in M: - clr = clrs.pop(0) - m.color = clr - - # optionally output models which were found - if rank == 0 and verbose: - for m in M: - print (" {0:>45}").format(m.name) - - if len(M) == 0: - if verbose and rank == 0: print "No model results found" - comm.Barrier() - comm.Abort(0) - - return M - -def ParseModelSetup(model_setup,models=[],verbose=False,filter=""): - """Initializes a list of models - - Initializes a list of models where each model is the subdirectory - beneath the given model root directory. The global list of models - will exist on each processor. - - Parameters - ---------- - model_setup : str - the directory whose subdirectories will become the model results - models : list of str, optional - only initialize a model whose name is in this list - verbose : bool, optional - enable to print information to the screen - - Returns - ------- - M : list of ILAMB.ModelResults.ModelsResults - a list of the model results, sorted alphabetically by name - - """ - # initialize the models - M = [] - max_model_name_len = 0 - if rank == 0 and verbose: print "\nSetting up model results from %s\n" % model_setup - with file(model_setup) as f: - for line in f.readlines(): - if line.strip().startswith("#"): continue - line = line.split(",") - mname = None - mdir = None - model_year = None - if len(line) >= 2: - mname = line[0].strip() - mdir = line[1].strip() - # if mdir not a directory, then maybe path is relative to ILAMB_ROOT - if not os.path.isdir(mdir): - mdir = os.path.join(os.environ["ILAMB_ROOT"],mdir).strip() - if len(line) == 4: - model_year = [float(line[2].strip()),float(line[3].strip())] - max_model_name_len = max(max_model_name_len,len(mname)) - if (len(models) > 0 and mname not in models) or (mname is None): continue - M.append(ModelResult(mdir, modelname = mname, filter=filter, model_year = model_year)) - - #M = sorted(M,key=lambda m: m.name.upper()) - - # assign unique colors - clrs = il.GenerateDistinctColors(len(M)) - for m in M: - clr = clrs.pop(0) - m.color = clr - - # optionally output models which were found - if rank == 0 and verbose: - for m in M: - print (" {0:>45}").format(m.name) - - if len(M) == 0: - if verbose and rank == 0: print "No model results found" - comm.Barrier() - comm.Abort(0) - - return M - -def InitializeRegions(filenames): - """Initialize regions from a list of files. - - If the file is a netCDF4 file, see documentation in - ILAMB.Regions.addRegionNetCDF4 for details on the required - format. If the file defines regions by latitude/longitude bounds, - then we anticipate comma delimited rows in the following form: - - shortname, longname, min lat, max lat, min lon, max lon - - Note that latitudes should be on (-90,90) and longitudes on - (-180,180). - - Parameters - ---------- - filenames : list of str - a list of files from which to search for regions - - """ - r = Regions() - for filename in filenames: - try: - r.addRegionNetCDF4(filename) - except IOError: - for line in file(filename): - line = line.strip() - if line.startswith("#"): continue - line = line.split(",") - if len(line) == 6: - r.addRegionLatLonBounds(line[0].strip(), - line[1].strip(), - [float(line[2]),float(line[3])], - [float(line[4]),float(line[5])]) - -def MatchRelationshipConfrontation(C): - """Match relationship strings to confrontation longnames - - We allow for relationships to be studied by specifying the - confrontation longname in the configure file. This routine loops - over all defined relationships and finds the matching - confrontation. (NOTE: this really belongs inside the Scoreboard - object) - - Parameters - ---------- - C : list of ILAMB.Confrontation.Confrontation - the confrontation list - - Returns - ------- - C : list of ILAMB.Confrontation.Confrontation - the same list with relationships linked to confrontations - """ - for c in C: - if c.relationships is None: continue - for i,longname in enumerate(c.relationships): - found = False - for cor in C: - if longname.lower() == cor.longname.lower(): - c.relationships[i] = cor - found = True - return C - -def FilterConfrontationList(C,match_list): - """Filter the confrontation list - - Filter the confrontation list by requiring that at least one - string in the input list is found in the longname in the - confrontation. - - Parameters - ---------- - C : list of ILAMB.Confrontation.Confrontation - the source list of confrontations - match_list : list of str - the list of strings - - Returns - ------- - Cf : list of ILAMB.Confrontation.Confrontation - the list of filtered confrontations - """ - if len(match_list) == 0: return C - Cf = [] - for c in C: - for match in match_list: - if match in c.longname: Cf.append(c) - return Cf - -def BuildLocalWorkList(M,C): - """Build the local work list - - We enumerate a list of work by taking combinations of model - results and confrontations. This list is partitioned evenly among - processes preferring to cluster as many confrontations with the - same name together. While the work of the model-confrontation pair - is local, some post-processing operations need performed once per - confrontation. Thus we also need to flag one instance of each - confrontation as the master process. - - Parameters - ---------- - M : list of ILAMB.ModelResult.ModelResult - list of models to analyze - C : list of ILAMB.Confrontation.Confrontation - list of confrontations - - Returns - ------- - localW : list of (ILAMB.ModelResult.ModelResult, ILAMB.Confrontation.Confrontation) tuples - the work local to this process - """ - - # Evenly divide up the work among processes - W = [] - for c in C: - for m in M: - W.append([m,c]) - wpp = float(len(W))/size - begin = int(round( rank *wpp)) - end = int(round((rank+1)*wpp)) - localW = W[begin:end] - - # Determine who is the master of each confrontation - for c in C: - sendbuf = np.zeros(size,dtype='int') - for w in localW: - if c is w[1]: sendbuf[rank] += 1 - recvbuf = None - if rank == 0: recvbuf = np.empty([size, sendbuf.size],dtype='int') - comm.Gather(sendbuf,recvbuf,root=0) - if rank == 0: - numc = recvbuf.sum(axis=1) - else: - numc = np.empty(size,dtype='int') - comm.Bcast(numc,root=0) - if rank == numc.argmax(): - c.master = True - else: - c.master = False - - return localW - -def WorkConfront(W,verbose=False,clean=False): - """Performs the confrontation analysis - - For each model-confrontation pair (m,c) in the input work list, - this routine will call c.confront(m) and keep track of the time - required as well as any exceptions which are thrown. - - Parameters - ---------- - W : list of (ILAMB.ModelResult.ModelResult, ILAMB.Confrontation.Confrontation) tuples - the list of work - verbose : bool, optional - enable to print output to the screen monitoring progress - clean : bool, optional - enable to perform the confrontation again, overwriting previous results - - """ - maxCL = 45; maxML = 20 - - # Run analysis on your local work model-confrontation pairs - for w in W: - m,c = w - - # if the results file exists, skip this confrontation unless we want to clean - if os.path.isfile(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name))) and clean is False: - if verbose: - print (" {0:>%d} {1:<%d} %sUsingCachedData%s " % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name) - sys.stdout.flush() - continue - - # try to run the confrontation - try: - t0 = time.time() - c.confront(m) - dt = time.time()-t0 - proc[rank] += dt - if verbose: - print (" {0:>%d} {1:<%d} %sCompleted%s {2:>5.1f} s" % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name,dt) - sys.stdout.flush() - - # if things do not work out, print the exception so the user has some idea - except Exception as ex: - logger.debug("[%s][%s]\n%s" % (c.longname,m.name,format_exc())) - if verbose: - print (" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name) - -def WorkPost(M,C,W,S,verbose=False,skip_plots=False): - """Performs the post-processing - - Determines plot limits across all models, makes plots, generates - other forms of HTML output. - - Parameters - ---------- - M : list of ILAMB.ModelResult.ModelResult - list of models to analyze - C : list of ILAMB.Confrontation.Confrontation - list of confrontations - W : list of (ILAMB.ModelResult.ModelResult, ILAMB.Confrontation.Confrontation) tuples - the list of work - S : ILAMB.Scoreboard.Scoreboard - the scoreboard context - verbose : bool, optional - enable to print output to the screen monitoring progress - skip_plots : bool, optional - enable to skip plotting - """ - maxCL = 45; maxML = 20 - - # work done on just the master confrontation - for c in C: c.determinePlotLimits() - - for w in W: - m,c = w - try: - t0 = time.time() - if not skip_plots: - c.modelPlots(m) - c.sitePlots(m) - c.computeOverallScore(m) - dt = time.time()-t0 - proc[rank] += dt - if verbose: - print (" {0:>%d} {1:<%d} %sCompleted%s {2:>5.1f} s" % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name,dt) - sys.stdout.flush() - except Exception as ex: - logger.debug("[%s][%s]\n%s" % (c.longname,m.name,format_exc())) - if verbose: - print (" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name) - sys.stdout.flush() - - comm.Barrier() - for c in C: - if not skip_plots: - try: - c.compositePlots() - except Exception as ex: - logger.debug("[compositePlots][%s]\n%s" % (c.longname,format_exc())) - c.generateHtml() - - comm.Barrier() - if rank==0: - S.createHtml(M) - S.createSummaryFigure(M) - -def RestrictiveModelExtents(M,eps=2.): - extents0 = np.asarray([[-90.,+90.],[-180.,+180.]]) - extents = extents0.copy() - for m in M: - for i in range(2): - extents[i,0] = max(extents[i,0],m.extents[i,0]) - extents[i,1] = min(extents[i,1],m.extents[i,1]) - diff = np.abs(extents0-extents) - extents = (diff<=eps)*extents0 + (diff>eps)*extents - return extents - -class MPIStream(): - """ - The MPI.File stream doesn't have the functions we need, so we will - wrap what we need in a simple class. - """ - def __init__(self, comm, filename, mode): - self.fh = MPI.File.Open(comm, filename, mode) - self.fh.Set_atomicity(True) - - def write(self,buf): - self.fh.Write_shared(buf) - - def flush(self): - self.fh.Sync() - - def close(self): - self.fh.Close() - -class MPIFileHandler(logging.FileHandler): - """ - A handler class which writes formatted logging records to disk files. - """ - def __init__(self, filename, mode = MPI.MODE_WRONLY|MPI.MODE_CREATE, delay = 0, comm = MPI.COMM_WORLD): - """ - Open the specified file and use it as the stream for logging. - """ - self.baseFilename = os.path.abspath(filename) - self.mode = mode - self.encoding = None - self.delay = delay - self.comm = comm - if delay: - logging.Handler.__init__(self) - self.stream = None - else: - logging.StreamHandler.__init__(self, self._open()) - - def _open(self): - """ - Open the current base file with the (original) mode and encoding. - Return the resulting stream. - """ - stream = MPIStream(self.comm, self.baseFilename, self.mode ) - return stream - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument('--model_root', dest="model_root", metavar='root', type=str, nargs=1, default=["./"], - help='root at which to search for models') -parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, - help='path to configuration file to use') -parser.add_argument('--models', dest="models", metavar='m', type=str, nargs='+',default=[], - help='specify which models to run, list model names with no quotes and only separated by a space.') -parser.add_argument('--model_year', dest="model_year", metavar='y0 yf', type=int, nargs='+',default=[], - help='set to shift model years, "--model_year y0 yf" will shift years from y0 to yf') -parser.add_argument('--confrontations', dest="confront", metavar='c', type=str, nargs='+',default=[], - help='specify which confrontations to run, list confrontation names with no quotes and only separated by a space.') -parser.add_argument('--regions', dest="regions", metavar='r', type=str, nargs='+',default=['global'], - help='specify which regions to compute over') -parser.add_argument('--clean', dest="clean", action="store_true", - help='enable to remove analysis files and recompute') -parser.add_argument('--disable_logging', dest="logging", action="store_false", - help='disables logging') -parser.add_argument('-q','--quiet', dest="quiet", action="store_true", - help='enable to silence screen output') -parser.add_argument('--filter', dest="filter", metavar='filter', type=str, nargs=1, default=[""], - help='a string which much be in the model filenames') -parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], - help='path of where to save the output') -parser.add_argument('--define_regions', dest="define_regions", type=str, nargs='+',default=[], - help='list files containing user-defined regions') -parser.add_argument('--model_setup', dest="model_setup", type=str, nargs='+',default=None, - help='list files model setup information') -parser.add_argument('--skip_plots', dest="skip_plots", action="store_true", - help='enable to skip the plotting phase') -parser.add_argument('--rel_only', dest="rel_only", action="store_true", - help='enable only display relative differences in overall scores') -args = parser.parse_args() -if args.config is None: - if rank == 0: - print "\nError: You must specify a configuration file using the option --config\n" - comm.Barrier() - comm.Abort(1) - -# Setup regions -r = Regions() -InitializeRegions(args.define_regions) -missing = [] -for region in args.regions: - if region not in r.regions: missing.append(region) -if len(missing) > 0: - raise ValueError("Unable to find the following regions %s from the following list of possible regions %s" % (missing,r.regions)) - -# Setup study -T0 = time.time() -if args.model_setup is None: - M = InitializeModels(args.model_root[0],args.models,not args.quiet,filter=args.filter[0],model_year=args.model_year) -else: - M = ParseModelSetup(args.model_setup[0],args.models,not args.quiet,filter=args.filter[0]) -if rank == 0 and not args.quiet: print "\nParsing config file %s...\n" % args.config[0] -S = Scoreboard(args.config[0], - regions = args.regions, - master = rank==0, - verbose = not args.quiet, - build_dir = args.build_dir[0], - extents = RestrictiveModelExtents(M), - rel_only = args.rel_only) -C = MatchRelationshipConfrontation(S.list()) -Cf = FilterConfrontationList(C,args.confront) - -# Setup logging -logger = logging.getLogger("%i" % comm.rank) -formatter = logging.Formatter('[%(levelname)s][%(name)s][%(funcName)s]%(message)s') -logger.setLevel(logging.DEBUG) -if args.logging: - mh = MPIFileHandler('%s/ILAMB%02d.log' % (S.build_dir,len(glob.glob("%s/*.log" % S.build_dir))+1)) - mh.setFormatter(formatter) - logger.addHandler(mh) - -if rank == 0: - logger.info(" " + " ".join(os.uname())) - for key in ["ILAMB","numpy","matplotlib","netCDF4","cf_units","sympy","mpi4py"]: - pkg = __import__(key) - try: - path = pkg.__path__[0] - except: - path = key - logger.info(" %s (%s)" % (path,pkg.__version__)) - logger.info(" %s" % datetime.datetime.now()) - -if rank == 0 and not args.quiet and len(Cf) != len(C): - print "\nWe filtered some confrontations, actually running...\n" - for c in Cf: print (" {0:>45}").format(c.longname) -C = Cf - -sys.stdout.flush(); comm.Barrier() - -if rank==0 and not args.quiet: print "\nRunning model-confrontation pairs...\n" - -sys.stdout.flush(); comm.Barrier() - -W = BuildLocalWorkList(M,C) -WorkConfront(W,not args.quiet,args.clean) - -sys.stdout.flush(); comm.Barrier() - -if rank==0 and not args.quiet: print "\nFinishing post-processing which requires collectives...\n" - -sys.stdout.flush(); comm.Barrier() - -WorkPost(M,C,W,S,not args.quiet,args.skip_plots) - -sys.stdout.flush(); comm.Barrier() - -# Runtime information -proc_reduced = np.zeros(proc.shape) -comm.Reduce(proc,proc_reduced,root=0) -if size > 1: logger.info("[process time] %.1f s" % proc[rank]) -if rank==0: - logger.info("[total time] %.1f s" % (time.time()-T0)) - if size > 1: - if proc_reduced.min() > 1e-6: - logger.info("[process balance] %.2f" % (proc_reduced.max()/proc_reduced.min())) - else: - logger.info("[process balance] nan") - logger.info("[parallel efficiency] %.0f%%" % (100.*proc_reduced.sum()/float(size)/(time.time()-T0))) - -if rank==0: S.dumpScores(M,"scores.csv") - -if rank==0 and not args.quiet: print "\nCompleted in {0:>5.1f} s\n".format(time.time()-T0) diff --git a/ilamb/ilamb/bin/ilamb-table b/ilamb/ilamb/bin/ilamb-table deleted file mode 100644 index 374b6abb..00000000 --- a/ilamb/ilamb/bin/ilamb-table +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python -""" -""" -from ILAMB.Scoreboard import Scoreboard -from netCDF4 import Dataset -import os,argparse,sys - -parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, - help='path to configuration file to use') -parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], - help='path of where to save the output') -parser.add_argument('--csv_file', dest="csv", metavar='csv', type=str, nargs=1,default=["table.csv"], - help='destination filename for the table') - -args = parser.parse_args() -if args.config is None: - print "\nError: You must specify a configuration file using the option --config\n" - sys.exit(1) - -S = Scoreboard(args.config[0],verbose=False,build_dir=args.build_dir[0]) - -region = "global" -scalar = "RMSE" -sname = "%s %s" % (scalar,region) -group = "MeanState" -table = {} -unit = {} -for c in S.list(): - for subdir, dirs, files in os.walk(c.output_path): - for fname in files: - if not fname.endswith(".nc"): continue - with Dataset(os.path.join(c.output_path,fname)) as dset: - if group not in dset.groups .keys(): continue - if "scalars" not in dset.groups[group].groups.keys(): continue - grp = dset.groups[group]["scalars"] - if sname not in grp.variables.keys(): continue - var = grp.variables[sname] - if not table.has_key(c.longname): - table[c.longname] = {} - unit [c.longname] = var.units - table[c.longname][dset.name] = var[...] - -# What models have data? -models = [] -for key in table.keys(): - for m in table[key].keys(): - if m not in models: models.append(m) -models.sort() - -# render a table of values in csv format -lines = ",".join(["Name","Units"] + models) -for c in S.list(): - if not table.has_key(c.longname): continue - line = "%s,%s" % (c.longname,unit[c.longname]) - for m in models: - if table[c.longname].has_key(m): - line += ",%g" % (table[c.longname][m]) - else: - line += "," - lines += "\n%s" % line - -with file(args.csv[0],mode="w") as f: - f.write(lines) - diff --git a/ilamb/ilamb/demo/diurnal.cfg b/ilamb/ilamb/demo/diurnal.cfg deleted file mode 100644 index 6bc6833b..00000000 --- a/ilamb/ilamb/demo/diurnal.cfg +++ /dev/null @@ -1,76 +0,0 @@ - -#[h1: Ecosystem and Carbon Cycle] -#bgcolor = "#ECFFE6" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -#[h2: Gross Primary Productivity] -#variable = "gpp" - -#[Ameriflux] -#source = "DATA/Ameriflux/gpp_ameriflux_hh.nc" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -#[h2: Net Ecosystem Exchange] -#variable = "nee" -#derived = "gpp-ra-rh" - -#[Ameriflux] -#source = "DATA/Ameriflux/nee_ameriflux_hh.nc" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -#[h2: Ecosystem Respiration] -#variable = "reco" -#derived = "ra+rh" - -#[Ameriflux] -#source = "DATA/Ameriflux/reco_ameriflux_hh.nc" - -########################################################################### - -[h1: Hydrology Cycle] -bgcolor = "#E6F9FF" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Latent Heat] -variable = "hfls" -ctype = ConfDiurnal - -[AmerifluxHR] -source = "DATA/Ameriflux/hfls_ameriflux_hr.nc" - -[AmerifluxHH] -source = "DATA/Ameriflux/hfls_ameriflux_hh.nc" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Sensible Heat] -variable = "hfss" -ctype = ConfDiurnal - -[AmerifluxHR] -source = "DATA/Ameriflux/hfss_ameriflux_hr.nc" - -[AmerifluxHH] -source = "DATA/Ameriflux/hfss_ameriflux_hh.nc" - -########################################################################### - -[h1: Forcings] -bgcolor = "#EDEDED" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Air Temperature] -variable = "tas" -ctype = ConfDiurnal - -[AmerifluxHR] -source = "DATA/Ameriflux/tas_ameriflux_hr.nc" - -[AmerifluxHH] -source = "DATA/Ameriflux/tas_ameriflux_hh.nc" - diff --git a/ilamb/ilamb/demo/ilamb.cfg b/ilamb/ilamb/demo/ilamb.cfg deleted file mode 100644 index 90bb7a68..00000000 --- a/ilamb/ilamb/demo/ilamb.cfg +++ /dev/null @@ -1,544 +0,0 @@ -# This configure file uses observational data which can be obtained by -# running the following command after exporting ILAMB_ROOT to the -# appropriate location. -# -# ilamb-fetch --remote_root http://ilamb.ornl.gov/ILAMB-Data -# -[h1: Ecosystem and Carbon Cycle] -bgcolor = "#ECFFE6" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Biomass] -variable = "biomass" -alternate_vars = "cVeg" -weight = 5 -skip_rmse = True -mass_weighting = True - -[GEOCARBON] -source = "DATA/biomass/GEOCARBON/biomass_0.5x0.5.nc" -weight = 20 -table_unit = "Pg" -plot_unit = "kg m-2" -space_mean = False - -[GlobalCarbon] -source = "DATA/biomass/GLOBAL.CARBON/biomass_0.5x0.5.nc" -weight = 16 -table_unit = "Pg" -plot_unit = "kg m-2" -space_mean = False - -[NBCD2000] -source = "DATA/biomass/NBCD2000/biomass_0.5x0.5.nc" -weight = 8 -table_unit = "Pg" -plot_unit = "kg m-2" -space_mean = False - -[USForest] -source = "DATA/biomass/US.FOREST/biomass_0.5x0.5.nc" -weight = 8 -table_unit = "Pg" -plot_unit = "kg m-2" -space_mean = False - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Burned Area] -variable = "burntArea" -weight = 4 -cmap = "OrRd" -mass_weighting = True - -[GFED4S] -source = "DATA/burntArea/GFED4S/burntArea_0.5x0.5.nc" -weight = 20 -relationships = "Precipitation/GPCP2","SurfaceAirTemperature/CRU" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Gross Primary Productivity] -variable = "gpp" -cmap = "Greens" -weight = 5 -mass_weighting = True - -[Fluxnet] -source = "DATA/gpp/FLUXNET/gpp.nc" -weight = 9 -table_unit = "g m-2 d-1" -plot_unit = "g m-2 d-1" - -[GBAF] -source = "DATA/gpp/GBAF/gpp_0.5x0.5.nc" -weight = 15 -table_unit = "Pg yr-1" -plot_unit = "g m-2 d-1" -space_mean = False -skip_iav = True -relationships = "Evapotranspiration/GLEAM","Precipitation/GPCP2","SurfaceDownwardSWRadiation/CERES","SurfaceNetSWRadiation/CERES","SurfaceAirTemperature/CRU" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Leaf Area Index] -variable = "lai" -cmap = "Greens" -weight = 3 -mass_weighting = True - -[AVHRR] -source = "DATA/lai/AVHRR/lai_0.5x0.5.nc" -weight = 15 -relationships = "Precipitation/GPCP2" - -[MODIS] -source = "DATA/lai/MODIS/lai_0.5x0.5.nc" -weight = 15 -relationships = "Precipitation/GPCP2" -skip_iav = True - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Global Net Ecosystem Carbon Balance] -variable = "nbp" -weight = 5 -ctype = "ConfNBP" - -[GCP] -source = "DATA/nbp/GCP/nbp_1959-2012.nc" -weight = 20 - -[Hoffman] -source = "DATA/nbp/HOFFMAN/nbp_1850-2010.nc" -weight = 20 -skip_taylor = True - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Net Ecosystem Exchange] -variable = "nee" -derived = "gpp-ra-rh" -weight = 5 -mass_weighting = True - -[Fluxnet] -source = "DATA/nee/FLUXNET/nee.nc" -weight = 9 -table_unit = "g m-2 d-1" -plot_unit = "g m-2 d-1" - -[GBAF] -source = "DATA/nee/GBAF/nee_0.5x0.5.nc" -weight = 4 -table_unit = "Pg yr-1" -plot_unit = "g m-2 d-1" -space_mean = False -skip_iav = True - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Ecosystem Respiration] -variable = "reco" -derived = "ra+rh" -weight = 4 -mass_weighting = True - -[Fluxnet] -source = "DATA/reco/FLUXNET/reco.nc" -weight = 6 -table_unit = "g m-2 d-1" -plot_unit = "g m-2 d-1" - -[GBAF] -source = "DATA/reco/GBAF/reco_0.5x0.5.nc" -weight = 4 -table_unit = "Pg yr-1" -plot_unit = "g m-2 d-1" -space_mean = False -skip_iav = True - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Soil Carbon] -variable = "cSoil" -alternate_vars = "soilc" -weight = 5 -mass_weighting = True - -[HWSD] -source = "DATA/soilc/HWSD/soilc_0.5x0.5.nc" -weight = 15 -table_unit = "Pg" -plot_unit = "kg m-2" -space_mean = False -skip_rmse = True - -[NCSCDV22] -source = "DATA/soilc/NCSCDV22/soilc_0.5x0.5.nc" -weight = 12 -table_unit = "Pg" -plot_unit = "kg m-2" -space_mean = False -skip_rmse = True - -########################################################################### - -[h1: Hydrology Cycle] -bgcolor = "#E6F9FF" - -[h2: Evapotranspiration] -variable = "et" -alternate_vars = "evspsbl" -cmap = "Blues" -weight = 5 -mass_weighting = True - -[GLEAM] -source = "DATA/et/GLEAM/et_0.5x0.5.nc" -weight = 15 -table_unit = "mm d-1" -plot_unit = "mm d-1" -relationships = "Precipitation/GPCP2","SurfaceAirTemperature/CRU" - -[MODIS] -source = "DATA/et/MODIS/et_0.5x0.5.nc" -weight = 15 -table_unit = "mm d-1" -plot_unit = "mm d-1" -relationships = "Precipitation/GPCP2","SurfaceAirTemperature/CRU" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Evaporative Fraction] -variable = "EvapFrac" -weight = 5 -mass_weighting = True -ctype = "ConfEvapFraction" - -[GBAF] -source = "DATA/EvapFrac/GBAF/EvapFrac_0.5x0.5.nc" -weight = 9 -skip_rmse = True -skip_iav = True -limit_type = "99per" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Latent Heat] -variable = "hfls" -alternate_vars = "le" -cmap = "Oranges" -weight = 5 -mass_weighting = True - -[Fluxnet] -source = "DATA/le/FLUXNET/le.nc" -weight = 3 - -[GBAF] -source = "DATA/le/GBAF/le_0.5x0.5.nc" -land = True -weight = 9 -skip_iav = True - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Runoff] -variable = "runoff" -alternate_vars = "mrro" -weight = 5 - -[Dai] -ctype = "ConfRunoff" -source = "DATA/runoff/Dai/runoff.nc" -weight = 15 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Sensible Heat] -variable = "hfss" -alternate_vars = "sh" -weight = 2 -mass_weighting = True - -[Fluxnet] -source = "DATA/sh/FLUXNET/sh.nc" -weight = 9 - -[GBAF] -source = "DATA/sh/GBAF/sh_0.5x0.5.nc" -weight = 15 -skip_iav = True - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Terrestrial Water Storage Anomaly] -variable = "twsa" -alternate_vars = "tws" -derived = "pr-evspsbl-mrro" -cmap = "Blues" -weight = 5 -ctype = "ConfTWSA" - -[GRACE] -source = "DATA/twsa/GRACE/twsa_0.5x0.5.nc" -weight = 25 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Snow Water Equivalent] -variable = "swe" -alternate_vars = "snw" -cmap = "Blues" -weight = 5 - -[CanSISE] -source = "DATA/swe/CanSISE/swe.nc" -weight = 25 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Permafrost] -variable = "tsl" - -[NSIDC] -ctype = "ConfPermafrost" -source = "DATA/permafrost/NSIDC/NSIDC_0.5x0.5.nc" -y0 = 1970. -yf = 2000. -Teps = 273.15 -dmax = 3.5 - -########################################################################### - -[h1: Radiation and Energy Cycle] -bgcolor = "#FFECE6" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Albedo] -variable = "albedo" -derived = "rsus/rsds" -weight = 1 - -[CERES] -source = "DATA/albedo/CERES/albedo_0.5x0.5.nc" -weight = 20 - -[GEWEX.SRB] -source = "DATA/albedo/GEWEX.SRB/albedo_0.5x0.5.nc" -weight = 20 - -[MODIS] -source = "DATA/albedo/MODIS/albedo_0.5x0.5.nc" -weight = 20 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Upward SW Radiation] -variable = "rsus" -weight = 1 - -[CERES] -source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" -weight = 15 - -[GEWEX.SRB] -source = "DATA/rsus/GEWEX.SRB/rsus_0.5x0.5.nc" -weight = 15 - -[WRMC.BSRN] -source = "DATA/rsus/WRMC.BSRN/rsus.nc" -weight = 12 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Net SW Radiation] -variable = "rsns" -derived = "rsds-rsus" -weight = 1 - -[CERES] -source = "DATA/rsns/CERES/rsns_0.5x0.5.nc" -weight = 15 - -[GEWEX.SRB] -source = "DATA/rsns/GEWEX.SRB/rsns_0.5x0.5.nc" -weight = 15 - -[WRMC.BSRN] -source = "DATA/rsns/WRMC.BSRN/rsns.nc" -weight = 12 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Upward LW Radiation] -variable = "rlus" -weight = 1 - -[CERES] -source = "DATA/rlus/CERES/rlus_0.5x0.5.nc" -weight = 15 - -[GEWEX.SRB] -source = "DATA/rlus/GEWEX.SRB/rlus_0.5x0.5.nc" -weight = 15 - -[WRMC.BSRN] -source = "DATA/rlus/WRMC.BSRN/rlus.nc" -weight = 12 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Net LW Radiation] -variable = "rlns" -derived = "rlds-rlus" -weight = 1 - -[CERES] -source = "DATA/rlns/CERES/rlns_0.5x0.5.nc" -weight = 15 - -[GEWEX.SRB] -source = "DATA/rlns/GEWEX.SRB/rlns_0.5x0.5.nc" -weight = 15 - -[WRMC.BSRN] -source = "DATA/rlns/WRMC.BSRN/rlns.nc" -weight = 12 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Net Radiation] -variable = "rns" -derived = "rlds-rlus+rsds-rsus" -weight = 2 - -[CERES] -source = "DATA/rns/CERES/rns_0.5x0.5.nc" -weight = 15 - -[Fluxnet] -source = "DATA/rns/FLUXNET/rns.nc" -weight = 12 - -[GEWEX.SRB] -source = "DATA/rns/GEWEX.SRB/rns_0.5x0.5.nc" -weight = 15 - -[WRMC.BSRN] -source = "DATA/rns/WRMC.BSRN/rns.nc" -weight = 12 - -########################################################################### - -[h1: Forcings] -bgcolor = "#EDEDED" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Air Temperature] -variable = "tas" -weight = 2 - -[CRU] -source = "DATA/tas/CRU/tas_0.5x0.5.nc" -weight = 25 - -[Fluxnet] -source = "DATA/tas/FLUXNET/tas.nc" -weight = 9 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Precipitation] -variable = "pr" -cmap = "Blues" -weight = 2 -mass_weighting = True - -[CMAP] -source = "DATA/pr/CMAP/pr_0.5x0.5.nc" -land = True -weight = 20 -table_unit = "mm d-1" -plot_unit = "mm d-1" -space_mean = True - -[Fluxnet] -source = "DATA/pr/FLUXNET/pr.nc" -land = True -weight = 9 -table_unit = "mm d-1" -plot_unit = "mm d-1" - -[GPCC] -source = "DATA/pr/GPCC/pr_0.5x0.5.nc" -land = True -weight = 20 -table_unit = "mm d-1" -plot_unit = "mm d-1" -space_mean = True - -[GPCP2] -source = "DATA/pr/GPCP2/pr_0.5x0.5.nc" -land = True -weight = 20 -table_unit = "mm d-1" -plot_unit = "mm d-1" -space_mean = True - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Relative Humidity] -variable = "rhums" -alternate_vars = "hurs" -cmap = "Blues" -weight = 3 -mass_weighting = True - -[ERA] -source = "DATA/rhums/ERA/rhums_0.5x0.5.nc" -weight = 10 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Downward SW Radiation] -variable = "rsds" -weight = 2 - -[CERES] -source = "DATA/rsds/CERES/rsds_0.5x0.5.nc" -weight = 15 - -[Fluxnet] -source = "DATA/rsds/FLUXNET/rsds.nc" -weight = 12 - -[GEWEX.SRB] -source = "DATA/rsds/GEWEX.SRB/rsds_0.5x0.5.nc" -weight = 15 - -[WRMC.BSRN] -source = "DATA/rsds/WRMC.BSRN/rsds.nc" -weight = 12 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Downward LW Radiation] -variable = "rlds" -weight = 1 - -[CERES] -source = "DATA/rlds/CERES/rlds_0.5x0.5.nc" -weight = 15 - -[GEWEX.SRB] -source = "DATA/rlds/GEWEX.SRB/rlds_0.5x0.5.nc" -weight = 15 - -[WRMC.BSRN] -source = "DATA/rlds/WRMC.BSRN/rlds.nc" -weight = 12 diff --git a/ilamb/ilamb/demo/iomb.cfg b/ilamb/ilamb/demo/iomb.cfg deleted file mode 100644 index a534397b..00000000 --- a/ilamb/ilamb/demo/iomb.cfg +++ /dev/null @@ -1,65 +0,0 @@ -# This configure file uses observational data which can be obtained by -# running the following command after exporting ILAMB_ROOT to the -# appropriate location. -# -# ilamb-fetch --remote_root http://ilamb.ornl.gov/IOMB-Data -# -[h1: Marine Chemistry] -bgcolor = "#ECFFE6" - -[h2: Nitrate] -variable = "no3" -alternate_vars = "NO3" -cmap = "PuBu" -ctype = "ConfIOMB" - -[WOA] -source = "DATA/nitrate/WOA/nitrate.nc" - -[h2: Phosphate] -variable = "po4" -alternate_vars = "PO4" -cmap = "Oranges" -ctype = "ConfIOMB" - -[WOA] -source = "DATA/phosphate/WOA/phosphate.nc" - -[h2: Silicate] -variable = "si" -alternate_vars = "SiO3" -cmap = "BuPu" -ctype = "ConfIOMB" - -[WOA] -source = "DATA/silicate/WOA/silicate.nc" - -[h1: Physical Quantities] -bgcolor = "#FFECE6" - -[h2: Salinity] -variable = "so" -alternate_vars = "SALT" -cmap = "GnBu" -ctype = "ConfIOMB" - -[WOA] -source = "DATA/salinity/WOA/salinity.nc" - -[h2: Oxygen] -variable = "o2" -alternate_vars = "O2" -cmap = "GnBu" -ctype = "ConfIOMB" - -[WOA] -source = "DATA/oxygen/WOA/oxygen.nc" - -[h2: Temperature] -variable = "thetao" -alternate_vars = "TEMP" -cmap = "rainbow" -ctype = "ConfIOMB" - -[WOA] -source = "DATA/temperature/WOA/temperature.nc" diff --git a/ilamb/ilamb/demo/sample.cfg b/ilamb/ilamb/demo/sample.cfg deleted file mode 100644 index 0e3abaa9..00000000 --- a/ilamb/ilamb/demo/sample.cfg +++ /dev/null @@ -1,18 +0,0 @@ -# This configure file specifies the variables - -[h1: Radiation and Energy Cycle] -bgcolor = "#FFECE6" - -[h2: Surface Upward SW Radiation] -variable = "rsus" - -[CERES] -source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" - -[h2: Albedo] -variable = "albedo" -derived = "rsus/rsds" - -[CERES] -source = "DATA/albedo/CERES/albedo_0.5x0.5.nc" - diff --git a/ilamb/ilamb/doc/Makefile b/ilamb/ilamb/doc/Makefile deleted file mode 100644 index 5fff5947..00000000 --- a/ilamb/ilamb/doc/Makefile +++ /dev/null @@ -1,186 +0,0 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build - -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) -$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) -endif - -# Internal variables. -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . - -.PHONY: ilamb help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext - -help: - @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " dirhtml to make HTML files named index.html in directories" - @echo " singlehtml to make a single large HTML file" - @echo " pickle to make pickle files" - @echo " json to make JSON files" - @echo " htmlhelp to make HTML files and a HTML help project" - @echo " qthelp to make HTML files and a qthelp project" - @echo " devhelp to make HTML files and a Devhelp project" - @echo " epub to make an epub" - @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" - @echo " latexpdf to make LaTeX files and run them through pdflatex" - @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" - @echo " text to make text files" - @echo " man to make manual pages" - @echo " texinfo to make Texinfo files" - @echo " info to make Texinfo files and run them through makeinfo" - @echo " gettext to make PO message catalogs" - @echo " changes to make an overview of all changed/added/deprecated items" - @echo " xml to make Docutils-native XML files" - @echo " pseudoxml to make pseudoxml-XML files for display purposes" - @echo " linkcheck to check all external links for integrity" - @echo " doctest to run all doctests embedded in the documentation (if enabled)" - @echo " coverage to run coverage check of the documentation (if enabled)" - -ilamb: - pushd ../; python setup.py install --user; popd - -clean: - rm -rf $(BUILDDIR)/* _generated - -html: ilamb - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -dirhtml: - $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." - -singlehtml: - $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml - @echo - @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." - -pickle: - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle - @echo - @echo "Build finished; now you can process the pickle files." - -json: - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json - @echo - @echo "Build finished; now you can process the JSON files." - -htmlhelp: - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp - @echo - @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in $(BUILDDIR)/htmlhelp." - -qthelp: - $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp - @echo - @echo "Build finished; now you can run "qcollectiongenerator" with the" \ - ".qhcp project file in $(BUILDDIR)/qthelp, like this:" - @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/TestPkg.qhcp" - @echo "To view the help file:" - @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/TestPkg.qhc" - -devhelp: - $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp - @echo - @echo "Build finished." - @echo "To view the help file:" - @echo "# mkdir -p $$HOME/.local/share/devhelp/TestPkg" - @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/TestPkg" - @echo "# devhelp" - -epub: - $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub - @echo - @echo "Build finished. The epub file is in $(BUILDDIR)/epub." - -latex: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo - @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." - @echo "Run \`make' in that directory to run these through (pdf)latex" \ - "(use \`make latexpdf' here to do that automatically)." - -latexpdf: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through pdflatex..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -latexpdfja: - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex - @echo "Running LaTeX files through platex and dvipdfmx..." - $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja - @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." - -text: - $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text - @echo - @echo "Build finished. The text files are in $(BUILDDIR)/text." - -man: - $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man - @echo - @echo "Build finished. The manual pages are in $(BUILDDIR)/man." - -texinfo: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo - @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." - @echo "Run \`make' in that directory to run these through makeinfo" \ - "(use \`make info' here to do that automatically)." - -info: - $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo - @echo "Running Texinfo files through makeinfo..." - make -C $(BUILDDIR)/texinfo info - @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." - -gettext: - $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale - @echo - @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." - -changes: - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes - @echo - @echo "The overview file is in $(BUILDDIR)/changes." - -linkcheck: - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -doctest: - $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest - @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." - -coverage: - $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage - @echo "Testing of coverage in the sources finished, look at the " \ - "results in $(BUILDDIR)/coverage/python.txt." - -xml: - $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml - @echo - @echo "Build finished. The XML files are in $(BUILDDIR)/xml." - -pseudoxml: - $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml - @echo - @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/ilamb/ilamb/doc/_templates/class.rst b/ilamb/ilamb/doc/_templates/class.rst deleted file mode 100644 index 714edba7..00000000 --- a/ilamb/ilamb/doc/_templates/class.rst +++ /dev/null @@ -1,25 +0,0 @@ -{% extends "!autosummary/class.rst" %} - -{% block methods %} -{% if methods %} - .. autosummary:: - :toctree: - {% for item in all_methods %} - {%- if not item.startswith('_') %} - {{ name }}.{{ item }} - {%- endif -%} - {%- endfor %} -{% endif %} -{% endblock %} - -{% block attributes %} -{% if attributes %} - .. autosummary:: - :toctree: - {% for item in all_attributes %} - {%- if not item.startswith('_') %} - {{ name }}.{{ item }} - {%- endif -%} - {%- endfor %} -{% endif %} -{% endblock %} diff --git a/ilamb/ilamb/doc/add_data.rst b/ilamb/ilamb/doc/add_data.rst deleted file mode 100644 index f1f66c15..00000000 --- a/ilamb/ilamb/doc/add_data.rst +++ /dev/null @@ -1,150 +0,0 @@ -Adding a Benchmark Dataset -========================== - -The following tutorial builds on the *First Steps* `tutorial -<./first_steps.html>`_ by describing how additional datasets may be -added to our sample benchmark comparison. We will add a Surface Upward -Shortwave Radiation dataset from the the central archive of the -Baseline Surface Radiation Network (BSRN) on the World Radiation -Monitoring Center (WRMC). We have provided a file in the appropriate -format `here `_. We -suggest that you create a directory inside the ``rsus`` directory -called ``WRMC.BSRN`` and place the downloaded file inside. We will -show the appropriate part of the tree here:: - - DATA/ - ├── albedo - │ └── CERES - │ └── albedo_0.5x0.5.nc - └── rsus - ├── CERES - │ └── rsus_0.5x0.5.nc - └── WRMC.BSRN - └── rsus.nc - -To add this dataset to our benchmarks, we only need to add a new line -to ``sample.cfg`` under the ``h2`` heading which corresponds to -Surface Upward Shortwave Radiation. Here we show only the portion of -the configure file which pertains to this variable with the new -dataset addition:: - - [h2: Surface Upward SW Radiation] - variable = "rsus" - - [CERES] - source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" - - [WRMC.BSRN] - source = "DATA/rsus/WRMC.BSRN/rsus.nc" - -Now if we execute the ``ilamb-run`` script as before:: - - ilamb-run --config sample.cfg --model_root $ILAMB_ROOT/MODELS/ --regions global - -we will see the following output to the screen:: - - Searching for model results in /home/ncf/sandbox/ILAMB_sample//MODELS/ - - CLM40cn - - Parsing config file sample.cfg... - - SurfaceUpwardSWRadiation/CERES Initialized - SurfaceUpwardSWRadiation/WRMC.BSRN Initialized - Albedo/CERES Initialized - - Running model-confrontation pairs... - - SurfaceUpwardSWRadiation/CERES CLM40cn UsingCachedData - SurfaceUpwardSWRadiation/WRMC.BSRN CLM40cn Completed 1.0 s - Albedo/CERES CLM40cn UsingCachedData - - Finishing post-processing which requires collectives... - - SurfaceUpwardSWRadiation/CERES CLM40cn Completed 6.4 s - SurfaceUpwardSWRadiation/WRMC.BSRN CLM40cn Completed 6.3 s - Albedo/CERES CLM40cn Completed 6.8 s - - Completed in 29.0 s - -You will notice that on running the script again, we did not have to -perform the analysis step for the confrontations we ran -previously. When a model-confrontation pair is run, we save the -analysis information in a netCDF4 file. If this file is detected in -the setup process, then we will use the results from the file and skip -the analysis step. The plotting, however, is repeated. - -You will also notice that the new ``rsus`` dataset we added ran much -more quickly than the CERES dataset. This is because the new dataset -is only defined at 55 specific sites as opposed to the whole globe at -half degree resolution. Despite the difference in these datasets, the -interface into the system (that is, the configuration file entry) is -the same. This represents an element of our design philosophy--the -benchmark datasets should contain sufficient information so that the -appropriate commensurate information from the model may be -extracted. When we open the ``WRMC.BSRN`` dataset, we detect that the -desired variable is defined over datasites. From this we can then -automatically sample the model results, extracting information from -the appropriate gridcells. - -Weighting Datasets ------------------- - -To view the results of the new dataset, look inside the ``_build`` -directory and open a file called ``index.html`` in your favorite web -browser. You should see a webpage entitled *ILAMB Benchmark Results* -and a series of three tabs, the middle of which is entitled *Results -Table*. If you click on the row of the table which bears the name -*Surface Upward SW Radiation* you will see that the row expands to -reveal how individual datasets contributed to the overall score for -this variable. Here we reproduce this portion of the table. - -=========================== ======= -Dataset CLM40cn -=========================== ======= -Surface Upward SW Radiation 0.77 - CERES (50.0%) 0.79 - WRMC.BSRN (50.0%) 0.74 -=========================== ======= - -The values you get for scores may vary from this table as our scoring -methodology is in flux as we develop and hone it. The main point here -is that we have weighted each dataset equally, as seen in the -percentages listed after each dataset name. While this is a reasonable -default, it is unlikely as you add datasets that you will have equal -confidence in their quality. To address this, we provide you with a -method of weighting datasets in the configuration file. For the sake -of demonstration, let us assume that we are five times as confident in -the CERES data. This we can express by modifying the relevant section -of the configuration file:: - - [h2: Surface Upward SW Radiation] - variable = "rsus" - - [CERES] - source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" - weight = 5 - - [WRMC.BSRN] - source = "DATA/rsus/WRMC.BSRN/rsus.nc" - weight = 1 - -and then running the script as before. This will run quickly as we do -not require a reanalysis for a mere change of weights. Once the run is -complete, open again or reload ``_build/index.html`` and navigate to -the same section of the results table. You should see the change in -weight reflected in the percentages as well as in the overall score -for the variable. - -=========================== ======= -Dataset CLM40cn -=========================== ======= -Surface Upward SW Radiation 0.78 - CERES (83.3%) 0.79 - WRMC.BSRN (16.7%) 0.74 -=========================== ======= - -You may notice that if you apply the weighting by hand based on the -output printed in the table, that you appear to get a different -result. This is because the HTML table output is rounded for display -purposes, but the scores are computed and weighted in full precision. diff --git a/ilamb/ilamb/doc/add_model.rst b/ilamb/ilamb/doc/add_model.rst deleted file mode 100644 index e3bd2b10..00000000 --- a/ilamb/ilamb/doc/add_model.rst +++ /dev/null @@ -1,117 +0,0 @@ -Adding a Model -============== - -The following tutorial builds on the `previous <./first_steps.html>`_ -by describing how another model's results may be added to the -benchmarking results for CLM40cn. At this point, we suggest that you -try to incoporate model result data of your own. At a minimum you will -need to have the ``rsus`` and ``rsds`` variables expressed as monthly -mean values at least partially over the time period of the source data -(2000-2012). In the event you do not have model data of your own, this -tutorial will copy the CLM40cn results data and treat it as another -model as a demonstration only. - -The main concept you need to understand is how ``ilamb-run`` finds and -classifies model results. When executing ``ilamb-run`` in the previous -tutorial, we specified an option ``--model_root -$ILAMB_ROOT/MODELS/``. This tells the script where to look for model -results. The script will consider each subdirectory of the specified -directory as a separate model result. So for example, if we copy the -CLM40cn results into a new directory represented in the following -tree:: - - ./MODELS - ├── CLM40cn - │ ├── rsds - │ │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - │ └── rsus - │ └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - └── CLMCopy - ├── rsds - │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - └── rsus - └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - -Then when we execute the same ``ilamb-run`` command as before:: - - ilamb-run --config sample.cfg --model_root $ILAMB_ROOT/MODELS/ --regions global - -We observe that the new model is indeed found and the confrontations -are run. Here we reproduce the screen output:: - - Searching for model results in /home/ncf/sandbox/ILAMB_sample/MODELS/ - - CLM40cn - CLMCopy - - Parsing config file sample.cfg... - - SurfaceUpwardSWRadiation/CERES Initialized - Albedo/CERES Initialized - - Running model-confrontation pairs... - - SurfaceUpwardSWRadiation/CERES CLM40cn UsingCachedData - SurfaceUpwardSWRadiation/CERES CLMCopy Completed 38.4 s - Albedo/CERES CLM40cn UsingCachedData - Albedo/CERES CLMCopy Completed 39.9 s - - Finishing post-processing which requires collectives... - - SurfaceUpwardSWRadiation/CERES CLM40cn Completed 3.8 s - SurfaceUpwardSWRadiation/CERES CLMCopy Completed 3.0 s - Albedo/CERES CLM40cn Completed 3.9 s - Albedo/CERES CLMCopy Completed 3.8 s - - Completed in 92.8 s - -You will notice that on executing the run script again, we did not have to -perform the analysis step for the model we ran in the previous -tutorial. When a model-confrontation pair is run, we save the analysis -information in a netCDF4 file. If this file is detected in the setup -process, then we will use the results from the file and skip the -analysis step. The plotting, however, is repeated. This is because -adding extra models will possible change the limits on the plots and -thus must be rendered again. - -You have a great deal of flexibility as to how results are saved. That -is, they need not exist in separate files within subdirectories -bearing the name of the variable which they represent. We could, for -example, move the sample data around in the following way:: - - ./MODELS - ├── CLM40cn - │ ├── rsds - │ │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - │ └── rsus - │ └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - └── CLMCopy - └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - -and the run script will interpret the model in exactly the same -manner. The variables can even be in the same file or across multiple -files representing different sections of the simulation time. We will -detect which variables are in which files, and combine them -automatically. The only real requirement is that all the files be -located under a directory bearing the model's name. This directory -could even be a symbolic link. On my personal machine, I have data -from a CLM45bgc run saved. So I can create a symbolic link from my -``MODELS`` directory to the location on my local machine:: - - ./MODELS - ├── CLM40cn - │ ├── rsds - │ │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - │ └── rsus - │ └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - ├── CLM45bgc -> /work/ILAMB/MODELS/CLM/CLM45bgc/ - └── CLMCopy - └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - -and the run script will follow this link and perform the analysis on the -result files it finds there. This allows you to create a group of -models which you wish to study without having to move results around -your machine. - diff --git a/ilamb/ilamb/doc/conf.py b/ilamb/ilamb/doc/conf.py deleted file mode 100644 index 3287ccc1..00000000 --- a/ilamb/ilamb/doc/conf.py +++ /dev/null @@ -1,357 +0,0 @@ -# -*- coding: utf-8 -*- -# -# ILAMB documentation build configuration file, created by -# sphinx-quickstart on Sat Oct 25 15:45:20 2014. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.viewcode', - 'sphinxcontrib.napoleon' -] - -autosummary_generate = True - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'ILAMB' -copyright = u'2014, Nathan Collier, Forrest Hoffman' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -import ILAMB -version = ILAMB.__version__ -# The full version, including alpha/beta/rc tags. -release = ILAMB.__version__ - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = ['_build','_templates'] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'sphinxdoc' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = [] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -#html_extra_path = [] - -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' -#html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# Now only 'ja' uses this config value -#html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'ILAMBdoc' - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - ('index', 'ILAMB.tex', u'ILAMB Documentation', - u'Nathan Collier, Forrest Hoffman', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'testpkg', u'ILAMB Documentation', - [u'Nathan Collier, Forrest Hoffman'], 1) -] - -# If true, show URL addresses after external links. -#man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ('index', 'ILAMB', u'ILAMB Documentation', - u'Nathan Collier, Forrest Hoffman', 'ILAMB', 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] - -# If false, no module index is generated. -#texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False - - -# -- Options for Epub output ---------------------------------------------- - -# Bibliographic Dublin Core info. -epub_title = u'ILAMB' -epub_author = u'Nathan Collier, Forrest Hoffman' -epub_publisher = u'Nathan Collier, Forrest Hoffman' -epub_copyright = u'2014, Nathan Collier, Forrest Hoffman' - -# The basename for the epub file. It defaults to the project name. -#epub_basename = u'ILAMB' - -# The HTML theme for the epub output. Since the default themes are not optimized -# for small screen space, using the same theme for HTML and epub output is -# usually not wise. This defaults to 'epub', a theme designed to save visual -# space. -#epub_theme = 'epub' - -# The language of the text. It defaults to the language option -# or 'en' if the language is not set. -#epub_language = '' - -# The scheme of the identifier. Typical schemes are ISBN or URL. -#epub_scheme = '' - -# The unique identifier of the text. This can be a ISBN number -# or the project homepage. -#epub_identifier = '' - -# A unique identification for the text. -#epub_uid = '' - -# A tuple containing the cover image and cover page html template filenames. -#epub_cover = () - -# A sequence of (type, uri, title) tuples for the guide element of content.opf. -#epub_guide = () - -# HTML files that should be inserted before the pages created by sphinx. -# The format is a list of tuples containing the path and title. -#epub_pre_files = [] - -# HTML files shat should be inserted after the pages created by sphinx. -# The format is a list of tuples containing the path and title. -#epub_post_files = [] - -# A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] - -# The depth of the table of contents in toc.ncx. -#epub_tocdepth = 3 - -# Allow duplicate toc entries. -#epub_tocdup = True - -# Choose between 'default' and 'includehidden'. -#epub_tocscope = 'default' - -# Fix unsupported image types using the PIL. -#epub_fix_images = False - -# Scale large images. -#epub_max_image_width = 0 - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#epub_show_urls = 'inline' - -# If false, no index is generated. -#epub_use_index = True diff --git a/ilamb/ilamb/doc/confront.rst b/ilamb/ilamb/doc/confront.rst deleted file mode 100644 index ac6dcde3..00000000 --- a/ilamb/ilamb/doc/confront.rst +++ /dev/null @@ -1,294 +0,0 @@ -Custom Confrontations -===================== - -The ``Confrontation`` object we described in the previous tutorial is -implemented as a python class. This tutorial will assume you have some -familiarity with python and classes. We will try to make the concepts -easy to follow, but if you find that you need to learn about *class* -basics, we recommend the python documentation on them `here -`_. - -In this tutorial we will explain the implementation of a custom -``Confrontation`` by way of example. We will detail the code that we -use in the ILAMB system for benchmarking the global net ecosystem -carbon balance. The generic ``Confrontation`` will not work in this -case because: - -* There is no variable in the model outputs which directly compares to - the benchmark datasets. The variable ``nbp`` must be integrated over - the globe for it to be comparable. -* The analysis we want to perform is different than our standard mean - state analysis. We will compare the bias and RMSE of the integrated - quantity, but then we would like to also view the accumulation of - carbon over the time period. - -So we have some special-purpose code to write. We will present here -the implementation bit by bit and explain each function and -section. However, if you are following along and implementing the -class as you read, we recommend you look at the original source which -may be found on our `bitbucket -`_ -site. This is because the amount of tab space gets shifted in the -generation of this document. I will also omit the documentation -strings and imports here to keep the code short. - -The Constructor ---------------- - -The first thing we will do, is define a new class ``ConfNBP`` which -will be derived from the ``Confrontation`` base class. This means that -all the methods and member data of the ``Confrontation`` class will be -part of ``ConfNBP`` automatically. This is helpful, as it means that -the developer only needs to rewrite the functions that must behave -differently in his benchmark. So we define our class by writing:: - - class ConfNBP(Confrontation): - - def __init__(self,**keywords): - - # Ugly, but this is how we call the Confrontation constructor - super(ConfNBP,self).__init__(**keywords) - - # Now we overwrite some things which are different here - self.regions = ['global'] - -We place this class in a file which bears the name of the class -itself, ``ConfNBP.py``. The ``__init__`` function is what is known as -the constructor. A class can be thought of as a template and the -constructor is the function which runs when a new instance is -created. If I were to type:: - - a = Confrontation() - b = Confrontation() - -I would be creating two instances (``a`` and ``b``) of the -``Confrontation`` class and the constructor would run separately for -each of them. The constructor for the ``Confrontation`` class takes in -keywords as arguments. This means that instead of requiring users to -place arguments in a defined order, we allow them to specify arguments -by their names. We did this in the previous tutorial, when we -initialized a ``Confrontation`` in the following way:: - - c = Confrontation(source = os.environ["ILAMB_ROOT"] + "/DATA/rsus/CERES/rsus_0.5x0.5.nc", - name = "CERES", - variable = "rsus") - -The keywords we used here were ``source``, ``name``, and -``variable``. You have a lot of control over what a ``Confrontation`` -does via these keywords. A full list of them is available in the -`documentation -<_generated/ILAMB.Confrontation.Confrontation.html>`_. For the most -part, we want to use the ``Confrontation`` constructor as it is, and -so we could just leave the ``__init__`` function -unimplemented. However, one of the keywords of the ``Confrontation`` -constructor is not valid in our benchmark--the ``regions`` -keyword. This is a keyword where a user may specify a list of GFED -regions over which we will perform the analysis. In the case of our -``ConfNBP``, this is not a valid option as the benchmark data is -integrated over the globe. - -For this reason, we implement our own ``__init__`` function where we -manually call the constructor of the ``Confrontation`` class. This is -handled by using the python function ``super``. This references the -super object of our ``ConfNBP`` object and allows us to manually call -its constructor. After this constructor has run, we simply overwrite -the value of the ``regions`` member data to be the only valid value. - -Staging Data ------------- - -We need to implement our own ``stageData`` functionality as models do -not provide us with the integrated ``nbp`` directly. We will go over -its implementation here in pieces. First we get the observational -dataset:: - - def stageData(self,m): - - # get the observational data - obs = Variable(filename = self.source, - variable_name = self.variable, - alternate_vars = self.alternate_vars) - -So you will see first that the function signature has a ``self`` -argument. This will be true of all member functions of our class. This -is a special argument which is used to access all the member data of -the class itself. The second argument is ``m`` which is a instance of -a ``ModelResult``. Just below we use the ``Variable`` constructor to -extract the source data for this benchmark using member data of our -class. The member data ``source`` refers to the full path of the -benchmark dataset, ``variable`` is the name of the variable to extract -from within, and ``alternate_vars`` is a list of alternative names for -variable names which we can accept. By convention, we use the ``obs`` -name to refer to the returned ``Variable``. Next, we need to extract -the same data from the model:: - - # the model data needs integrated over the globe - mod = m.extractTimeSeries(self.variable, - alt_vars = self.alternate_vars) - mod = mod.integrateInSpace().convert(obs.unit) - - obs,mod = il.MakeComparable(obs,mod,clip_ref=True) - - # sign convention is backwards - obs.data *= -1. - mod.data *= -1. - - return obs,mod - -Here we use the ``ModelResult`` instance ``m`` to extract the -variable, and immediately integrate it over all space. We also ensure -that the units match the observations and, again by convention, we -refer to this in a variable we call ``mod``. Then we use the function -`MakeComparable <_generated/ILAMB.ilamblib.MakeComparable.html>`_ to ensure -that both the ``obs`` and ``mod`` variables are on the same time -frame, trimming away the non-overlapping times. Finally we multiply the -data associated with the observations and models by a negative one -because of a unwanted sign convention. - -The main concept of the ``stageData`` function is that you are passed -a ``ModelResult`` and you need to return two ``Variables`` which -represent comparable quantities from the observational and model -datasets. The ILAMB system does not care how you came about these -quantities. Here we have used more of the ILAMB package to create the -quantities we wish to compare. However, you may prefer to use other -tools or even interface to more complex methods of extracting relevant -information. The ILAMB package simply defines an interface which makes -the results of such data manipulation usable in a consistent system. - -Confront --------- - -We also need to implement our own ``confront`` functionality. This is -because most of our `mean state -<./_generated/ILAMB.ilamblib.AnalysisMeanState.html>`_ is not relevant -for our benchmark, and we would like to study the accumulation of -carbon which is not part of the procedure. As before we will break up -the ``confront`` function we implemented and explain it in sections:: - - def confront(self,m): - - # Grab the data - obs,mod = self.stageData(m) - -As with the ``stageData`` function, the ``confront`` function takes in -a ``ModelResult`` instance ``m`` and immediately calls the -``stageData`` function we just implemented. The observational dataset -and model result are returned as represented as ``Variables`` and -named ``obs`` and ``mod``, respectively. For both datasets, we want to -study the accumulated amount of carbon over the time period:: - - obs_sum = obs.accumulateInTime().convert("Pg") - mod_sum = mod.accumulateInTime().convert("Pg") - -as well as compare the mean values over the time period:: - - obs_mean = obs.integrateInTime(mean=True) - mod_mean = mod.integrateInTime(mean=True) - -and then the bias and RMSE:: - - bias = obs.bias(mod) - rmse = obs.rmse(mod) - -The functions, ``accumulateInTime``, ``convert``, ``integrateInTime``, -``bias``, and ``rmse`` are all member functions of the `Variable -<_generated/ILAMB.Variable.Variable.html>`_ class. So you can see that -this keeps analysis clean, short, and human readable. This handles the -majority of the analysis which we want to perform in this -confrontation. However, the ILAMB system is geared towards determining -a score from the analysis results. In this case, we will score a model -based on the bias and the RMSE in the following way: - - .. math:: S_{\text{bias}} = e^{-\left| \frac{\int \left(obs(t) - mod(t)\right)\ dt }{\int obs(t)\ dt } \right|} - .. math:: S_{\text{RMSE}} = e^{-\sqrt{ \frac{\int \left(obs(t) - mod(t)\right)^2\ dt }{\int obs(t)^2\ dt } }} - -This is accomplished in the following way:: - - obs_L1 = obs.integrateInTime() - dif_L1 = deepcopy(obs) - dif_L1.data -= mod.data - dif_L1 = dif_L1.integrateInTime() - bias_score = Variable(name = "Bias Score global", - unit = "1", - data = np.exp(-np.abs(dif_L1.data/obs_L1.data))) - -for the bias score and:: - - obs_L2 = deepcopy(obs) - obs_L2.data *= obs_L2.data - obs_L2 = obs_L2.integrateInTime() - dif_L2 = deepcopy(obs) - dif_L2.data = (dif_L2.data-mod.data)**2 - dif_L2 = dif_L2.integrateInTime() - rmse_score = Variable(name = "RMSE Score global", - unit = "1", - data = np.exp(-np.sqrt(dif_L2.data/obs_L2.data))) - -for the RMSE score. The code here is a bit more ugly than the previous -and reflects ways in which the ``Variable`` object needs to grow. At -this point the analysis results are finished and we are ready to save -things into result files. First, we will rename the variables in the -following way:: - - obs .name = "spaceint_of_nbp_over_global" - mod .name = "spaceint_of_nbp_over_global" - obs_sum .name = "accumulate_of_nbp_over_global" - mod_sum .name = "accumulate_of_nbp_over_global" - obs_mean.name = "Period Mean global" - mod_mean.name = "Period Mean global" - bias .name = "Bias global" - rmse .name = "RMSE global" - -We rename the variables because the ILAMB plotting and HTML generation -engine is built to recognize certain keywords in the variable name and -subsequently render the appropriate plots. Since our ``obs`` and -``mod`` variables represent spatial integrals of ``nbp``, we name them -with the keyword ``spaceint``. The ``accumulate`` keyword also will -cause a plot to automatically be generated and placed in the HTML -output in a predetermined location. This feature makes the -presentation of results trivial. The scalar quantities are also -changed such that their names reflect the table headings of the HTML -output. - -Finally we dump these variables into netCDF4 files. The first file -corresponds to the current model being analyzed. The dataset is opened -which will be saved into a logical path, with descriptive names. The -``Variable`` class has support for simply asking that an instanced be -dumped into an open dataset. Any dimension information or units are -automatically recorded:: - - results = Dataset("%s/%s_%s.nc" % (self.output_path,self.name,m.name),mode="w") - results.setncatts({"name" :m.name, "color":m.color}) - mod .toNetCDF4(results) - mod_sum .toNetCDF4(results) - mod_mean .toNetCDF4(results) - bias .toNetCDF4(results) - rmse .toNetCDF4(results) - bias_score.toNetCDF4(results) - rmse_score.toNetCDF4(results) - results.close() - -We also write out information from the benchmark dataset as -well. However, since confrontations can be run in parallel, only the -confrontation that is flagged as the master need write this output:: - - if self.master: - results = Dataset("%s/%s_Benchmark.nc" % (self.output_path,self.name),mode="w") - results.setncatts({"name" :"Benchmark", "color":np.asarray([0.5,0.5,0.5])}) - obs .toNetCDF4(results) - obs_sum .toNetCDF4(results) - obs_mean.toNetCDF4(results) - results.close() - -That is it ----------- - -While more involved than simply adding a dataset or model result to -the analysis, that is all we need to implement for our custom -confrontation. As you can see, we managed to encapsulate all of the -relevant code into one file which interfaces seamlessly with the rest -of the ILAMB system. In the case of ``ConfNBP.py``, we have included -it in the main repository for the ILAMB package. However, users may -create their own confrontations and host/maintain them separately for -use with the system. We see this as a first step towards a more -general framework for community-driven benchmarking. diff --git a/ilamb/ilamb/doc/custom_regions.rst b/ilamb/ilamb/doc/custom_regions.rst deleted file mode 100644 index 45ccbc6a..00000000 --- a/ilamb/ilamb/doc/custom_regions.rst +++ /dev/null @@ -1,124 +0,0 @@ -Defining Custom Regions -======================= - -In the `tutorial <./ilamb_run.html>`_ explaining the options of -``ilamb-run``, we highlight that custom regions may be defined in two -ways. The first is region definition by latitude and longitude bounds -which can be done in the form of a text file in the following comma -delimited format:: - - #label,name ,lat_min,lat_max,lon_min,lon_max - usa ,Continental US, 24, 50, -126, -66 - alaska,Alaska , 53, 72, -169, -129 - -The first column is the label to be used, followed by the region -name. Then the minimum and maximum bounds on the latitude and -longitude are specified. Note that longitude values are expected on -the [-180,180] interval. In this current iteration regions cannot be -specified which span the international dateline. - -The second method is by creating a netCDF4 file which will be used -internally to create a mask for each region. This we will demonstrate -by encoding the above regions but in this format. First we create the -spatial grid on which we will define the regions. - -.. code-block:: python - - from netCDF4 import Dataset - import numpy as np - - # Create the lat/lon dimensions - res = 0.5 - latbnd = np.asarray([np.arange(- 90 , 90 ,res), - np.arange(- 90+res, 90+0.01,res)]).T - lonbnd = np.asarray([np.arange(-180 ,180 ,res), - np.arange(-180+res,180+0.01,res)]).T - lat = latbnd.mean(axis=1) - lon = lonbnd.mean(axis=1) - -Next we create an array of integers which we will use to mark the -regions we wish to encode. This is essentially painting by numbers. We -initialize the array to a missing value which we will encode later. - -.. code-block:: python - - # Create the number array, initialize to a missing value - miss = -999 - ids = np.ones((lat.size,lon.size),dtype=int)*miss - -Then we paint the regions we wish to encode using the latitude and -longitude bounds which were in the sample text file above. This part -will vary depending on how you wish to define regions. For example, -our regions here will still appear to be defined by latitude and -longitude bounds because that is how we are creating the mask. You may -find other sources for your region definitions which will allow more -precise representations. Note that this method of definition means -that regions cannot overlap in a single file. If you need to define -overlapping regions, put each region in a separate file. - -.. code-block:: python - - # Paint the Continental US with a `0` - ids[np.where(np.outer((lat>= 24)*(lat<= 50), - (lon>=-126)*(lon<=- 66)))] = 0 - - # Paint Alaska with a `1` - ids[np.where(np.outer((lat>= 53)*(lat<= 72), - (lon>=-169)*(lon<=-129)))] = 1 - -Next we convert the ``numpy`` integer array to a masked array where we -mask by the missing value we defined above. Then we create an array of -labels to use as indentifiers for the integer numbers we defined. A -``0`` in the ``ids`` array will correspond to the ``USA`` region and a -``1`` to the ``Alaska`` region. These lower case version of these -names will be used as region labels. - -.. code-block:: python - - # Convert the ids to a masked array - ids = np.ma.masked_values(ids,miss) - - # Create the array of labels - lbl = np.asarray(["USA","Alaska"]) - -Finally we encode the netCDF4 dataset. There are a few important -details in this code. The first is to use the ``numpy`` datatypes of -the arrays when creating netCDF4 variables. This is especially -important in encoding the ``labels`` array as it will ensure the -string array is created properly. The other important detail is to -encode the ``labels`` attribute of the ``I`` variable. This is what -tells the ILAMB system where to find the labels for the integers -defined in the array. - -.. code-block:: python - - # Create netCDF dimensions - dset = Dataset("regions.nc",mode="w") - dset.createDimension("lat" ,size=lat.size) - dset.createDimension("lon" ,size=lon.size) - dset.createDimension("nb" ,size=2 ) - dset.createDimension("n" ,size=lbl.size) - - # Create netCDF variables - X = dset.createVariable("lat" ,lat.dtype,("lat" )) - XB = dset.createVariable("lat_bounds" ,lat.dtype,("lat","nb" )) - Y = dset.createVariable("lon" ,lon.dtype,("lon" )) - YB = dset.createVariable("lon_bounds" ,lon.dtype,("lon","nb" )) - I = dset.createVariable("ids" ,ids.dtype,("lat","lon")) - L = dset.createVariable("labels" ,lbl.dtype,("n" )) - - # Load data and encode attributes - X [...] = lat - X.units = "degrees_north" - XB[...] = latbnd - - Y [...] = lon - Y.units = "degrees_east" - YB[...] = lonbnd - - I[...] = ids - I.labels= "labels" - - L[...] = lbl - - dset.close() diff --git a/ilamb/ilamb/doc/first_steps.rst b/ilamb/ilamb/doc/first_steps.rst deleted file mode 100644 index 3f5c1f7b..00000000 --- a/ilamb/ilamb/doc/first_steps.rst +++ /dev/null @@ -1,233 +0,0 @@ -First Steps -=========== - -This software comes with no data. It is meant to be generic software -which facilitates the automatic confrontation of model results with -benchmark observational datasets. However, the best way to learn how -to use this software is with actual data. To this end we have a -relatively small sample which you can `download -`_. Extract -this file to a location of your choosing by the following:: - - tar -xvf minimal_ILAMB_data.tgz - cd ILAMB_sample - export ILAMB_ROOT=$PWD - -We use this environment variable in the ILAMB package to point to the -top level directory of the data. Later, when we reference specific -data locations, we can specify them relative to this path. This both -shortens the path and makes the configuration portable to other -systems or data locations. - -The following tree represents the organization of the contents of this -sample data:: - - ILAMB_sample/ - ├── DATA - │ ├── albedo - │ │ └── CERES - │ │ └── albedo_0.5x0.5.nc - │ └── rsus - │ └── CERES - │ └── rsus_0.5x0.5.nc - └── MODELS - └── CLM40cn - ├── rsds - │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - └── rsus - └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc - -There are two main branches in this directory. The first is the -``DATA`` directory--this is where we keep the observational datasets -each in a subdirectory bearing the name of the variable. While not -strictly necesary to follow this form, it is a convenient -convention. The second branch is the ``MODEL`` directory in which we -see a single model result from CLM. - -Configure Files ---------------- - -Now that we have data, we need to setup a file which the ILAMB package -will use to initiate a benchmark study. There is such a file which -comes with the software package in the ``demo`` directory called -``sample.cfg``. Navigate to the demo directory and open this file or view it `online -`_. We also reproduce it here for the purpose of this tutorial:: - - # This configure file specifies the variables - - [h1: Radiation and Energy Cycle] - bgcolor = "#FFECE6" - - [h2: Surface Upward SW Radiation] - variable = "rsus" - - [CERES] - source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" - - [h2: Albedo] - variable = "albedo" - derived = "rsus/rsds" - - [CERES] - source = "DATA/albedo/CERES/albedo_0.5x0.5.nc" - -We note that while the ILAMB package is written in python, this file -contains no python and is written in a small configure language of our -invention. Here we will go over this file line by line and explain how -each entry functions. - -At the top of the file, you see the following lines:: - - [h1: Radiation and Energy Cycle] - bgcolor = "#FFECE6" - -This is a tag that we use to tell the system that we will have a top -level heading ``h1`` which we call *Radiation and Energy Cycle*. While -you can name this section anything of your choosing, we have chosen -this name as it is descriptive of the benchmarking activities we will -perform. Also note that you may specify a background color here in -hexadecimal format (we found this site to be helpful to play around -with `colors `_). This color will -be used in the output which we will show later. It is important to -understand that heading are hierarchical--this heading owns everything -underneath it until the next ``h1`` tag is found or the file ends. We -use ``h1`` level headings to group variables of a given type to better -organize the output. - -Below this, you will notice a second level heading which appears like -this:: - - [h2: Surface Upward SW Radiation] - variable = "rsus" - -We will be looking at radiation here. The ``variable`` tag is the name -of the variable inside the dataset which represents the variable of -interest. Here ``rsus`` is a standard name used to represent -*Surface Upward Shortwave Radiation*. We use ``h2`` headings to -represent a variable which we wish to compare. - -The next entry in the file appears as the following:: - - [CERES] - source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" - -First, notice the absence of a ``h1`` or ``h2`` tag. This indicates -that this entry is a particular dataset of a given variable (our -``h2`` heading) of a given grouping (our ``h1`` heading). We have -named it CERES as that is the name of the data source we have -included. We only have to specify the location of the source dataset, -relative to the environment variable we set earlier, ``ILAMB_ROOT``. - -At this point we feel it important to mention that this is the minimum -required to setup a benchmark study in this system. If you have an -observational dataset which directly maps to a variable which is -output by models as ``rsus`` is, you are done. - -However, it is possible that your dataset has no direct analog in the -list of variables which models output and some manipulation is -needed. We have support for when your dataset corresponds to an -algebraic function of model variables. Consider the remaining entries -in our sample:: - - [h2: Albedo] - variable = "albedo" - derived = "rsus/rsds" - - [CERES] - source = "DATA/albedo/CERES/albedo_0.5x0.5.nc" - -We have done two things here. First we started a new ``h2`` heading -because we will now look at albedo. But albedo is not a variable which -is included in our list of model outputs (see the tree above). However -we have both upward and downward radiation, so we could compute -albedo. This is accomplished by adding the ``derived`` tag and -specifying the algebraic relationship. When our ILAMB system looks for -the albedo variable for a given model and cannot find it, it will try -to find the variables which are the arguments of the expression you -type in the ``derived`` tag. It will then combined them automatically -and resolve unit differences. - -The configuration language is small, but allows you to change a lot of -the behavior of the system. The full functionality is documented `here -`_. Non-algebraic manipulations are also possible, but -will be covered in a more advanced tutorial. - -Running the Study ------------------ - -Now that we have the configuration file set up, you can run the study -using the ``ilamb-run`` script. Executing the command:: - - ilamb-run --config sample.cfg --model_root $ILAMB_ROOT/MODELS/ --regions global - -If you are on some institutional resource, you may need to launch the -above command using a submission script, or request an interactive -node. As the script runs, it will yield output which resembles the -following:: - - Searching for model results in /Users/ncf/sandbox/ILAMB_sample/MODELS/ - - CLM40cn - - Parsing config file sample.cfg... - - SurfaceUpwardSWRadiation/CERES Initialized - Albedo/CERES Initialized - - Running model-confrontation pairs... - - SurfaceUpwardSWRadiation/CERES CLM40cn Completed 37.3 s - Albedo/CERES CLM40cn Completed 44.7 s - - Finishing post-processing which requires collectives... - - SurfaceUpwardSWRadiation/CERES CLM40cn Completed 3.3 s - Albedo/CERES CLM40cn Completed 3.3 s - - Completed in 91.8 s - -What happened here? First, the script looks for model results in the -directory you specified in the ``--model_root`` option. It will treat -each subdirectory of the specified directory as a separate model -result. Here since we only have one such directory, ``CLM40cn``, it -found that and set it up as a model in the system. Next it parsed the -configure file we examined earlier. We see that it found the CERES -data source for both variables as we specified it. If the source data -was not found or some other problem was encountered, the green -``Initialized`` will appear as red text which explains what the -problem was (most likely ``MisplacedData``). If you encounter this -error, make sure that ``ILAMB_ROOT`` is set correctly and that the -data really is in the paths you specified in the configure file. - -Next we ran all model-confrontation pairs. In our parlance, a -*confrontation* is a benchmark observational dataset and its -accompanying analsys. We have two confrontations specified in our -configure file and one model, so we have two entries here. If the -analysis completed without error, you will see a green ``Completed`` -text appear along with the runtime. Here we see that ``albedo`` took a -few seconds longer than ``rsus``, presumably because we had the -additional burden of reading in two datasets and combining them. - -The next stage is the post-processing. This is done as a separate loop -to exploit some parallelism. All the work in a model-confrontation -pair is purely local to the pair. Yet plotting results on the same -scale implies that we know the maxmimum and minimum values from all -models and thus requires the communcation of this information. Here, -as we are plotting only over the globe and not extra regions, the -plotting occurs quickly. - -Viewing the Output ------------------- - -The whole process generated a new directory of results in the demo -dorectory called ``_build``. To browse the results, open the -``_build/index.html`` file in any browser and you will see a webpage -with a summary image in the center. As we have so few variables and -models, this image will not make much sense at this point. Instead, -click the middle tab called ``Results Table``. From here you will see -both variables which we compared. Clicking on eithe will expand the -row to show the data sources which were part of the study. If you -further click on the CERES link in any row, it will take you to the -plots and tabulated information from the study. - - diff --git a/ilamb/ilamb/doc/format_data.rst b/ilamb/ilamb/doc/format_data.rst deleted file mode 100644 index 0a3e3043..00000000 --- a/ilamb/ilamb/doc/format_data.rst +++ /dev/null @@ -1,220 +0,0 @@ -Formatting a Benchmark Dataset -============================== - -The ILAMB system is designed to accept files in the form of netCDF4 -datasets which follow the `CF Conventions -`_. These conventions define metadata that -provide a definitive description of what the data in each variable -represents, and the spatial and temporal properties of the data. This -enables ILAMB to decide how to create a commensurate quantity from a -model's output results. - -While it is sufficient to follow the CF conventions when building your -observational dataset, ILAMB does not rigidly require full adherence -to this standard. That is to say, it is only necessary to have some of -the required fields and attributes. In this tutorial we will -demonstrate encoding a few demonstration files using python. However, -the samples only demonstrate what is needed for ILAMB to function and -can be replicated using other tools (i.e. Matlab, NCL). - -Globally gridded data ---------------------- - -In this sample we will create a random variable representing monthly -mean values from 1950-1960 on a 2 degree global grid. First we open a -dataset for writing and then create the time dimension data. - -.. code-block:: python - - from netCDF4 import Dataset - import numpy as np - - # Open a dataset for writing - dset = Dataset("global_sample.nc",mode="w") - - # Create temporal dimension - nyears = 10 - month_bnd = np.asarray([0,31,59,90,120,151,181,212,243,273,304,334,365],dtype=float) - tbnd = np.asarray([((np.arange(nyears)*365)[:,np.newaxis]+month_bnd[:-1]).flatten(), - ((np.arange(nyears)*365)[:,np.newaxis]+month_bnd[+1:]).flatten()]).T - tbnd += (1950-1850)*365 - t = tbnd.mean(axis=1) - -While the ``numpy`` portion of this code may be confusing, in concept -we are creating a ``tbnd`` array with a shape ``(120,2)`` which -contains the beginning and ending day of each month from 1950 -to 1960. Subsequently we compute a time array ``t`` of shape ``(120)`` -as the mean value between each of these bounds. - -Encoding the bounds of the time dimension is an important part of -creating the dataset for ILAMB. Many modeling centers have different -conventions as to where a given ``t`` is reported relative to the -interval ``tbnd``. By specifying the time bounds, ILAMB can precisely -match model output to the correct time interval. - -Consider encoding the time dimension even if your data is only -spatial. Many times the observational data we have may be a sparse -collection of points across a decade of observations. We mean to -compare these observations to a mean of the model result across some -time span. In this case, you can build a ``tbnd`` array of shape -``(1,2)`` where the bounds defines the span across which it is -appropriate to compare models. When ILAMB reads in this dataset, it -will detect a mistmatch is the temporal resolution of the model output -and your observational dataset and automatically coarsen the model -output across the specified time bounds. - -Now we move on to the spatial grid and the data itself. - -.. code-block:: python - - # Create spatial dimension - res = 2. - latbnd = np.asarray([np.arange(- 90 , 90 ,res), - np.arange(- 90+res, 90+0.01,res)]).T - lonbnd = np.asarray([np.arange(-180 ,180 ,res), - np.arange(-180+res,180+0.01,res)]).T - lat = latbnd.mean(axis=1) - lon = lonbnd.mean(axis=1) - - # Create some fake data - data = np.ma.masked_array(np.random.rand(t.size,lat.size,lon.size)) - -In this case we again use ``numpy`` to create bounding arrays for the -latitude and longitude. As with the temporal dimension, this is -preferred as it removes ambiguity and improves the accuracy which -ILAMB can deliver. The fake data here is just full of random numbers -in this case with no mask. Normally this data would come from some -other source. This is typically the most time consuming part of the -dataset creation process as data providers seldom provide their -datasets in netCDF format. - -Once you have all the information in memory, then we turn to encoding -the netCDF4 file. First we create all the dimensions and variables we -will use. For more information on these functions, consult the -`netcdf4-python `_ -documentation. - -.. code-block:: python - - # Create netCDF dimensions - dset.createDimension("time",size= t.size) - dset.createDimension("lat" ,size=lat.size) - dset.createDimension("lon" ,size=lon.size) - dset.createDimension("nb" ,size=2 ) - - # Create netCDF variables - T = dset.createVariable("time" ,t.dtype ,("time" )) - TB = dset.createVariable("time_bounds",t.dtype ,("time","nb")) - X = dset.createVariable("lat" ,lat.dtype ,("lat" )) - XB = dset.createVariable("lat_bounds" ,lat.dtype ,("lat","nb" )) - Y = dset.createVariable("lon" ,lon.dtype ,("lon" )) - YB = dset.createVariable("lon_bounds" ,lon.dtype ,("lon","nb" )) - D = dset.createVariable("var" ,data.dtype,("time","lat","lon")) - -Finally we load the netCDF4 Variables (``T,TB,X,XB,Y,YB,D``) with the -corresponding numerical values (``t,tbnd,lat,latbnd,lon,lonbnd,data``) -we computed in previous steps. We also encode a few attributes which -ILAMB will need as a bare minimum to correctly interpret the -values. Any units provided will need to adhere to the CF convention, see -`here -`_. - -.. code-block:: python - - # Load data and encode attributes - T [...] = t - T.units = "days since 1850-01-01" - T.calendar = "noleap" - T.bounds = "time_bounds" - TB[...] = tbnd - - X [...] = lat - X.units = "degrees_north" - XB[...] = latbnd - - Y [...] = lon - Y.units = "degrees_east" - YB[...] = lonbnd - - D[...] = data - D.units = "kg m-2 s-1" - dset.close() - -Site data ---------- - -Encoding data from a site or collection of sites is similar with two -main distinctions. First, there is a ``data`` dimension referring to -the number of sites in the set. The latitude and longitude arrays are -of this dimension. Second, the time array must span the maximum -coverage of the site collection. Consider a sample set here consisting -of two sites: site A which has monthly mean data from 1950 and site B -with monthly mean data from 1951. One thing to emphasize is that while -not part of the units description, these times need to be in UTC -format. This can be problematic as sites tend to store their data in a -local time coordinate. The time portion of our script is similar. - -.. code-block:: python - - from netCDF4 import Dataset - import numpy as np - - # Open a dataset for writing - dset = Dataset("global_sample.nc",mode="w") - - # Create temporal dimension - nyears = 2 - month_bnd = np.asarray([0,31,59,90,120,151,181,212,243,273,304,334,365],dtype=float) - tbnd = np.asarray([((np.arange(nyears)*365)[:,np.newaxis]+month_bnd[:-1]).flatten(), - ((np.arange(nyears)*365)[:,np.newaxis]+month_bnd[+1:]).flatten()]).T - tbnd += (1950-1850)*365 - t = tbnd.mean(axis=1) - -However the spatial portion just consists of two locations and -contains no bounds. The data array is then a 2D array where the first -dimension is the total number of time intervals represented and the -second dimension is the number of sites. The data array itself needs -to be masked over regions where each site contains no data. ILAMB will -apply this mask to the model results which it extracts. - -.. code-block:: python - - lat = np.asarray([- 35.655,-25.0197]) - lon = np.asarray([ 148.152, 31.4969]) - - data = np.ma.masked_array(np.zeros((t.size,2)),mask=True) # masked array of zeros - data[:12,0] = np.random.rand(12) # site A's random data - data[12:,1] = np.random.rand(12) # site B's random data - -As before this is the step that is the most complicated as it involves parsing text files into this format. Finally we output again the dimensions and variables to the netCDF4 file. - -.. code-block:: python - - # Create netCDF dimensions - dset.createDimension("time",size=t.size) - dset.createDimension("data",size=2 ) - dset.createDimension("nb" ,size=2 ) - - # Create netCDF variables - T = dset.createVariable("time" ,t.dtype ,("time" )) - TB = dset.createVariable("time_bounds",t.dtype ,("time","nb" )) - X = dset.createVariable("lat" ,lat.dtype ,("data" )) - Y = dset.createVariable("lon" ,lon.dtype ,("data" )) - D = dset.createVariable("var" ,data.dtype,("time","data")) - - # Load data and encode attributes - T [...] = t - T.units = "days since 1850-01-01" - T.calendar = "noleap" - T.bounds = "time_bounds" - TB[...] = tbnd - - X [...] = lat - X.units = "degrees_north" - - Y [...] = lon - Y.units = "degrees_east" - - D[...] = data - D.units = "kg m-2 s-1" - dset.close() diff --git a/ilamb/ilamb/doc/ilamb_doctor.rst b/ilamb/ilamb/doc/ilamb_doctor.rst deleted file mode 100644 index 97d64d13..00000000 --- a/ilamb/ilamb/doc/ilamb_doctor.rst +++ /dev/null @@ -1,61 +0,0 @@ -Diagnosing Missing Model Values with ``ilamb-doctor`` -===================================================== - -In a previous `tutorial <./add_model.html>`_ we covered how a model -can be added to the analysis. However, it can be challenging to know -which variables are needed. To this end we have created a tool called -``ilamb-doctor`` which tries to diagnose what is incorrect or missing -from a given analysis. It takes options similar to ``ilamb-run`` and -is used in the following way:: - - [ILAMB/test]$ ilamb-doctor --config test.cfg --model_root ${ILAMB_ROOT}/MODELS/CLM - - Searching for model results in /Users/ncf/ILAMB//MODELS/CLM - - CLM40n16r228 - CLM45n16r228 - CLM50n18r229 - - We will now look in each model for the variables in the ILAMB - configure file you specified (test.cfg). The color green is used to reflect - which variables were found in the model. The color red is used to - reflect that a model is missing a required variable. - - Biomass/GlobalCarbon CLM40n16r228 biomass or cVeg - GrossPrimaryProductivity/Fluxnet CLM40n16r228 gpp - GrossPrimaryProductivity/GBAF CLM40n16r228 gpp - GlobalNetEcosystemCarbonBalance/Hoffman CLM40n16r228 nbp - NetEcosystemExchange/GBAF CLM40n16r228 gpp, rh, and ra - TerrestrialWaterStorageAnomaly/GRACE CLM40n16r228 tws - Albedo/MODIS CLM40n16r228 rsus and rsds - SurfaceAirTemperature/CRU CLM40n16r228 tas - Precipitation/GPCP2 CLM40n16r228 pr - Biomass/GlobalCarbon CLM45n16r228 biomass or cVeg - GrossPrimaryProductivity/Fluxnet CLM45n16r228 gpp - GrossPrimaryProductivity/GBAF CLM45n16r228 gpp - GlobalNetEcosystemCarbonBalance/Hoffman CLM45n16r228 nbp - NetEcosystemExchange/GBAF CLM45n16r228 gpp, rh, and ra - TerrestrialWaterStorageAnomaly/GRACE CLM45n16r228 tws - Albedo/MODIS CLM45n16r228 rsus and rsds - SurfaceAirTemperature/CRU CLM45n16r228 tas - Precipitation/GPCP2 CLM45n16r228 pr - Biomass/GlobalCarbon CLM50n18r229 biomass or cVeg - GrossPrimaryProductivity/Fluxnet CLM50n18r229 gpp - GrossPrimaryProductivity/GBAF CLM50n18r229 gpp - GlobalNetEcosystemCarbonBalance/Hoffman CLM50n18r229 nbp - NetEcosystemExchange/GBAF CLM50n18r229 gpp, rh, and ra - TerrestrialWaterStorageAnomaly/GRACE CLM50n18r229 tws - Albedo/MODIS CLM50n18r229 rsus and rsds - SurfaceAirTemperature/CRU CLM50n18r229 tas - Precipitation/GPCP2 CLM50n18r229 pr - -Here we have run the command on some inputs in our ``test`` -directory. You will see a list of the confrontations we run and the -variables which are required or their synonyms. What is missing in -this tutorial is the text coloring which will indicate if a given -model has the required variables. - -In the future we will add more intelligence to this tool to help -diagnose potential problems. - - diff --git a/ilamb/ilamb/doc/ilamb_fetch.rst b/ilamb/ilamb/doc/ilamb_fetch.rst deleted file mode 100644 index 8500eb07..00000000 --- a/ilamb/ilamb/doc/ilamb_fetch.rst +++ /dev/null @@ -1,37 +0,0 @@ -Obtaining the ILAMB Data with ``ilamb-fetch`` -============================================= - -In previous tutorials we provided links to download a small dataset -for the purposes of demonstration. However we have another mechanism -for downloading the observational datasets which ILAMB needs. From a -commandline prompt, run ``ilamb-fetch``. You should see output similar -to the following:: - - Comparing remote location: - - http://ilamb.ornl.gov/ILAMB-Data/ - - To local location: - - ./ - - I found the following files which are missing, out of date, or corrupt: - - .//DATA/twsa/GRACE/twsa_0.5x0.5.nc - .//DATA/rlus/CERES/rlus_0.5x0.5.nc - ... - - Download replacements? [y/n] - -This tool looks at a remote location (by default the location of the -land datasets) and compares it to a local location (by defult -``ILAMB_ROOT`` or ``./``). It detects for the presence and version of -the data on your local machine and populates a list for download. The -tool will then prompt you to rerun to check for file validity. - -This tool can be used to download other data collections as well. If -you need the ocean IOMB data, then you can change the remote location -by running:: - - ilamb-fetch --remote_root http://ilamb.ornl.gov/IOMB-Data/ - diff --git a/ilamb/ilamb/doc/ilamb_run.rst b/ilamb/ilamb/doc/ilamb_run.rst deleted file mode 100644 index 7776126d..00000000 --- a/ilamb/ilamb/doc/ilamb_run.rst +++ /dev/null @@ -1,177 +0,0 @@ -Controlling the Analysis with ``ilamb-run`` Options -=================================================== - -While the basic operation of ``ilamb-run`` is covered in previous -tutorials, there are more options which can be used to control what -happens in a run. In this tutorial we will describe these options and -motivate when they are useful. - -Limiting the analysis ---------------------- - -The configure file specifies the confrontations which will be -performed. However, for many reasons in the development and debugging -process it may be advantageous to run ILAMB on only a subset of the -configure file. You can control this by specifying strings which must -be in the confrontation longname. For example, consider the following -arguments:: - - ilamb-run --config ilamb.cfg --model_root ${ILAMB_ROOT}/MODELS --confrontation CERES - -This line will run only the CERES confrontations found in the -``ilamb.cfg``. The same can be achieved for models with the -``--models`` option. In this case, you must specify the model names -which you wish to run against in a given run. - -To see how this is useful, imagine you have 3 models in your -``--model_root`` directory, but for one you needed to replace all the -model result files. So you need to rerun ILAMB, but not on all three -models. With this option, you can rerun ILAMB on just the needed -model, saving time. If this model name is ``modelC``, then the option -would be:: - - ilamb-run --config ilamb.cfg --model_root ${ILAMB_ROOT}/MODELS --models modelC --clean - -The ``--clean`` option here tells ILAMB to rerun the analysis even if -intermediate files are present, essentially cleaning out the -``modelC`` ILAMB contents and recomputing them, while leaving the rest -of the models untouched. - -Defining models ---------------- - -There are two ways to define models for an analysis. The first was -covered in previous tutorials. The ``--model_root`` option is used to -specify a location whose subdirectories (not recursive) are -initialized as separate models. This is the recommended method as it -is simple and fast. - -One issue that comes up is that model developers want to run ILAMB, -but during the development process as a sanity check on the model -performance. This means that model results may not be available in the -contemporary period. The ``--model_year`` option can be used to shift -the time of all models in an analysis by a fixed number of years. So -if a model run starts in 1850, but you wish to treat these results as -if they were the year 2000 (for comparing to some dataset), then the -appropriate command is ``--model_year 1850 2000``. - -While helplful, we also acknowledge that globally applying a shift in -time across all models might not be desirable. It is likely that a -user has cached the results from previous versions of the model which -have been spun up and run over the contemporary period. To this end we -provide a more detailed model setup option ``--model_setup -file.txt``. The contents of ``file.txt`` could look something like the -following:: - - # Model Name, Location of Files , Shift From, Shift To - CLM40 , ABSOLUTE/PATH/TO/CLM40 - CLM45 , ABSOLUTE/PATH/TO/CLM45 - CLM5X , ABSOLUTE/PATH/TO/CLM5X, 1850 , 2000 - CLM5Y , ABSOLUTE/PATH/TO/CLM5Y, 1850 , 2000 - -The text file is in simple comma delimited form with either 2 or 4 -columns. Lines which begin with ``#`` will be ignored. The first -column is the name which you wish to assign to the model and the -second is the absolute path of the results. The third and fourth -columns define the shift in years for each model. If there are only -two columns of data, we will not apply a shift. - -To add some context, this option may be useful in the model -development process. In our sample setup, we have two model versions -CLM4 and CLM4.5 whose results are archived and will not be changing -and thus do not need time shifted. We have setup two versions of CLM5, -X and Y which represent perhaps different parameterization choices, -shifted because we have not spun these models up. The ILAMB results -should be interpretted carefully, but comparing two parameterizations -in this way might provide insight into key differences. - -Regions -------- - -The ILAMB analysis can be performed on an arbitrary number of regions -which may be defined in many ways. The ILAMB package comes with a set -of these regions predefined which are used in the `Global Fire -Emissions Database `_. They are: - - * bona, Boreal North America - * tena, Temperate North America - * ceam, Central America - * nhsa, Northern Hemisphere South America - * shsa, Southern Hemisphere South America - * euro, Europe - * mide, Middle East - * nhaf, Northern Hemisphere Africa - * shaf, Southern Hemisphere Africa - * boas, Boreal Asia - * ceas, Central Asia - * seas, Southeast Asia - * eqas, Equatorial Asia - * aust, Australia - -The first entry in the above list is a region label. To avoid -confusion these should not have spaces or special characters. The -second entry is the name itself which will appear in the pull down -menus on the webpage otput. To run the ilamb analysis over particular -regions, use the ``--regions`` option and include the region labels -delimited by spaces. - -As we anticipate that users will desire to define their own regions, -we have provided this capability in two forms. The first is region -definition by latitude and longitude bounds which can be done in the -form of a text file in the following comma delimited format:: - - #label,name ,lat_min,lat_max,lon_min,lon_max - usa,Continental US, 24, 50, -126, -66 - -Additional rows in the same format may be included to define more -regions in the same file. The first column is the label to be used, -followed by the region name. Then the minimum and maximum bounds on -the latitude and longitude are specified. Note that longitude values -are expected on the [-180,180] interval. In this current iteration -regions cannot be specified which span the international dateline. - -The second form is by creating a mask using a netCDF4 file. We will go -into more detail about the format of the netCDF4 file for defining -masks in its own `tutorial <./custom_regions.html>`_. So if the sample -text file above is called ``regions.txt`` and we have a netCDF4 file -called ``amazon.nc`` with a region label ``amazon``, then the ILAMB -analysis can be performed over additional regions by specifying:: - - --define_regions regions.txt amazon.nc --regions global usa amazon - -In its current form, ILAMB expects that the analysis will be performed -over at least the global region. All overall scores are based on -information in that region. This is a restriction we are working to -loosen. If you need to circumvent this, you can redefine the region -labeled ``global`` to meet your needs. - - -Other options -------------- - -* ``--filter``, Sometimes a model has output from several runs or - experiments included in the same location. This is frequently - indicated by some string in the filename, such as ``r0i0p0``. This - option may be used to require that files contain a specific string - to be considered in the list of variables models provide. -* ``--skip_plots``, The plotting phase of ILAMB is expensive. It - takes a long time to generate all the thousands of graphics that get - produced. It may be that you are running ILAMB for the summary - graphic/information only. In this case you can run with this option - to speed up the run. -* ``--build_dir``, The default location for generating the ILAMB - output is a ``_build`` directory placed in the directory from which - you ran ``ilamb-run``. While fine for everyday use, you may wish to - control the location of this directory. -* ``--disable_logging``, ILAMB uses a MPI logger to write exceptions - and progress to a log file in a thread-lock fashion. This helps - tremendously when tracking down user errors. However, we have found - that on some systems (e.g. geysey at NCAR) this causes ``ilamb-run`` to - lock for reasons we do not yet understand. Disabling the logging - seems to circumvent the issue. If you find that ``ilamb-run`` makes - no progress when running in parallel, you might try this option. -* ``--quiet``, By default, ILAMB spits out progress information to - the screen. If you wish to supress this information, run with this - option. - - diff --git a/ilamb/ilamb/doc/index.rst b/ilamb/ilamb/doc/index.rst deleted file mode 100644 index 3ad2fddb..00000000 --- a/ilamb/ilamb/doc/index.rst +++ /dev/null @@ -1,39 +0,0 @@ -The ILAMB Benchmarking System -============================= - -The International Land Model Benchmarking (ILAMB_) project is a -model-data intercomparison and integration project designed to improve -the performance of land models and, in parallel, improve the design of -new measurement campaigns to reduce uncertainties associated with key -land surface processes. Building upon past model evaluation studies, -the goals of ILAMB are to: - -* develop internationally accepted benchmarks for land model - performance, -* promote the use of these benchmarks by the international community - for model intercomparison, -* strengthen linkages between experimental, remote sensing, and - climate modeling communities in the design of new model tests and - new measurement programs, and -* support the design and development of a new, open source, - benchmarking software system for use by the international community. - -It is the last of these goals to which this page is concerned. We have -developed a python-based generic benchmarking system, for which the -source code may be found on bitbucket_. The development is open and -patches are welcome. The main output of our package comes in the form -of a HTML site_ which can be navigated to explore and understand the -results. - -Documentation -------------- - -.. toctree:: - :maxdepth: 2 - - tutorial - packageapi - -.. _ILAMB: http://www.ilamb.org/ -.. _bitbucket: https://bitbucket.org/ncollier/ilamb -.. _site: http://www.climatemodeling.org/~nate/ILAMB/index.html diff --git a/ilamb/ilamb/doc/install.rst b/ilamb/ilamb/doc/install.rst deleted file mode 100644 index 84a85043..00000000 --- a/ilamb/ilamb/doc/install.rst +++ /dev/null @@ -1,278 +0,0 @@ -Installation -============ - -The ILAMB benchmarking software is written in python 2.7x and depends -on a few packages which extend the language's usefulness in scientific -applications. The easiest way to install the ILAMB package and its -dependencies is to get them from the Python Package Index (pypi_) using -pip_. To do so, type:: - - pip install ILAMB --user - -at the commandline and pip_ will install most everything -automatically. Please note that I have post-pended a ``--user`` flag -to the command. This is not strictly necessary yet recommended as it -will cause the packages to be installed to a *local* directory in -place of the *system* directory. This allows packages to be installed -without administrator privileges, and leaves your system installation -untouched, which may be important if you need to revert to a previous -state. You should see that a number of packages in addition to ILAMB -had their versions checked or were upgraded/installed as needed. These -include: - -* numpy_, the fundamental package for scientific computing with python -* matplotlib_, a 2D plotting library which produces publication quality figures -* netCDF4_, a python/numpy interface to the netCDF C library (you must have the C library installed) -* sympy_, a python library for symbolic mathematics -* mpi4py_, a python wrapper around the MPI library (you must have a MPI implementation installed) -* cf_units_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) - -I have designated that a few of these dependencies are python -interfaces to C libraries and so the library must also be installed -separately. See the individual package websites for more -details. Ideally, pip_ would be able to install all our dependencies -automatically. - -Unfortunately, one of our dependencies must be installed -manually. Despite being listed in the Python Package Index, basemap_ -cannot be installed with pip_. The meta information is listed, but the -package source is too large to be hosted and so installation fails. We -will need to install basemap_ from the source hosted on github_. This -is a useful process to understand as any python package can be -installed in this way. First, clone the git repository:: - - git clone https://github.com/matplotlib/basemap.git - -This will take some time as the repository is large (>100Mb) due to it -including some high resolution map data used in plotting. Enter into -the cloned directory and take note of a file called ``setup.py``. All -python packages will contain a file called ``setup.py`` in the top -level directory. This is where a developer tells python how to install -the package. Now we type:: - - python setup.py install --user - -and the package should install. Hopefully in the future basemap_ will -improve their installation process in pypi_, but in the meantime it -must be installed as we have detailed here. - -You can test your installation by the following command:: - - python -c "import ILAMB; print ILAMB.__version__" - -If you get a numerical output, then the package has been successfully -installed. - -Now what? ---------- - -If you got the installation to work, then you should proceed to -working on the next tutorial. Before leaving this page, there are a -few extra steps we recommend you perform. If you installed ILAMB using -the ``--user`` option, the executeable script ``ilamb-run`` will be -placed inside ``${HOME}/.local/bin``. You may need to postpend this -location to your ``PATH`` environment variable:: - - export PATH=${PATH}:${HOME}/.local/bin - -assuming you are using a ``bash`` environment. This will make the -``ilamb-run`` script executeable from any directory. Also, if you are -connecting to a machine remotely in order to run ILAMB, you may wish -to change the matplotlib_ backend to something that does not generate -interactive graphics:: - - export MPLBACKEND=Agg - -This will allow ILAMB to run without needing to connect with the -``-X`` option. - -What can go wrong? ------------------- - -In an ideal world, this will work just as I have typed it to -you. However, if you are here, something has happened and you need -help. Installing software is frequently all about making sure things -get put in the correct place. You may be unaware of it, but you may -have several versions of python floating around your machine. The pip_ -software we used to install packages needs to match the version of -python that we are using. Try typing:: - - pip --version - which python - python --version - -where you should see something like:: - - pip 9.0.1 from /usr/local/lib/python2.7/site-packages (python 2.7) - /usr/local/bin/python - Python 2.7.13 - -Notice that in my case the pip_ I am using matches the version and -location of the python. This is important as pip_ will install -packages into the locations which my python will find. If your pip_ -is, say, for python 3 but you are using python 2.7 then you will -install packages successfully, but they will seem to not be available -to you. The same thing can happen if you have the right version of -python, but it is installed in some other location. - -Now we provide some interpretation of the possible output you got from -the test. If you ran:: - - python -c "import ILAMB; print ILAMB.__version__" - -and you see something like:: - - Traceback (most recent call last): - File "", line 1, in - ImportError: No module named ILAMB - -Then the package did not correctly install and you need to look at the -screen output from the install process to see what went wrong. You may -also have observed an import error of a different sort. When you -import the ILAMB package, we check the version of all the packages on -which we depend. You could see an error text like the following:: - - Traceback (most recent call last): - File "", line 1, in - File "/usr/local/lib/python2.7/site-packages/ILAMB/__init__.py", line 29, in - (key,__version__,key,requires[key],pkg.__version__)) - ImportError: Bad numpy version: ILAMB 0.1 requires numpy >= 1.9.2 got 1.7 - -This means that while the ``numpy`` package is installed on your -system, its version is too old and you need to use pip_ to upgrade it -to at least the version listed. You may also see a message like the -following:: - - Traceback (most recent call last): - File "", line 1, in - File "/usr/local/lib/python2.7/site-packages/ILAMB/__init__.py", line 25, in - pkg = __import__(key) - ImportError: No module named numpy - -This means that we require the ``numpy`` package but you do not have -it installed at all. This should not happen, but if it does, use pip_ -to resolve this problem. It is possible that despite a seemingly -smooth installation of basemap_, ILAMB complains about there not being -a module called basemap:: - - Traceback (most recent call last): - File "", line 1, in - File "/usr/local/lib/python2.7/site-packages/ILAMB/__init__.py", line 24, in - pkg = __import__(key, globals(), locals(), [froms[key]]) - ImportError: No module named basemap - -Basemap is a little trickier than other python packages because it is -a *plugin* to the maplotlib package. My recommendation if you are -seeing this message is to install matplotlib in a local location and -upgrade it to the most up to date version:: - - pip install matplotlib --user --upgrade - -and then install basemap also using the ``--user`` option. This should -ensure that matplotlib toolkits find the basemap extension. - -Institutional machines ----------------------- - -While ILAMB is portable and runs on your laptop or workstation, you -may be working remotely on an institutional machine where you have -modeling output results. Many times these machines already have our -dependencies installed and we only have need to load them using -environment modules. See your computing center usage tutorials for -more information on how these work. Typically, you can search for -available software by:: - - module avail search_term - -for example. And then is loaded by:: - - module load software_name - -In an effort to make it simpler for users to get ILAMB running, we are -listing installation instructions here for a number of machines with -which we have experience. In each case, we have tried to start with -only the default software enabled. Your mileage may vary as the -software stacks at these centers frequently change. - -It is relevant to note that ILAMB uses MPI to parallelize the -benchmarking process. Thus MPI is called even if you are running on -just one process. Because of this, many if not all institutional -machines will then require you to launch a job though a submission -script. See your computing center for details. - -Edison @ NERSC -~~~~~~~~~~~~~~ - -.. code-block:: bash - - module load python - module load numpy - module load matplotlib - module load basemap - module load mpi4py - module load netcdf - module load netcdf4-python - module load udunits - pip install ILAMB --user - export PATH=${PATH}:${HOME}/.local/edison/2.7.9/bin/ - -The matplotlib on Edison is pretty old and control of the backend is -not possible using the ``MPLBACKEND`` environment variable. If you -want to run without needing to connect with the ``-X`` option, you -will need to change the backend through the ``matplotlibrc`` -file. First, copy this file from the system level, into your local -configure directory:: - - cp /usr/common/software/python/matplotlib/1.4.3/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-linux-x86_64.egg/matplotlib/mpl-data/matplotlibrc ${HOME}/.config/matplotlib/ - -Next open the local copy of the file with a editor and search for -``backend`` changing the value to the right of the colon to ``Agg``. - -Rhea @ OLCF -~~~~~~~~~~~ - -.. code-block:: bash - - module rm PE-intel - module load PE-gnu - module load netcdf - module load udunits - module load geos - module load python - module load python_setuptools - module load python_pip - module load python_numpy - module load python_matplotlib - module load python_matplotlib_basemap_toolkit - module load python_netcdf4 - module load python_mpi4py - pip install ILAMB --user - export PATH=${PATH}:${HOME}/.local/bin/ - # The udunits module file should do this but doesn't - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/sw/rhea/udunits/2.1.24/rhel6.6_gnu4.4.7/lib/ - -The matplotlib on Rhea is pretty old and control of the backend is -not possible using the ``MPLBACKEND`` environment variable. If you -want to run without needing to connect with the ``-X`` option, you -will need to change the backend through the ``matplotlibrc`` -file. First, copy this file from the system level, into your local -configure directory:: - - cp /sw/rhea/python_matplotlib/1.4.3/python2.7.9_numpy1.9.2_gnu4.8.2/lib64/python2.7/site-packages/matplotlib-1.4.3-py2.7-linux-x86_64.egg/matplotlib/mpl-data/matplotlibrc ${HOME}/.config/matplotlib/ - -Next open the local copy of the file with a editor and search for -``backend`` changing the value to the right of the colon to ``Agg``. - - - -.. _pypi: https://pypi.python.org/pypi -.. _pip: https://pip.pypa.io/en/stable/ -.. _repository: https://bitbucket.org/ncollier/ilamb -.. _numpy: https://www.numpy.org/ -.. _matplotlib: https://matplotlib.org/ -.. _netCDF4: https://github.com/Unidata/netcdf4-python -.. _cf_units: https://github.com/SciTools/cf-units -.. _basemap: https://github.com/matplotlib/basemap -.. _sympy: https://www.sympy.org/ -.. _mpi4py: https://pythonhosted.org/mpi4py/ -.. _github: https://github.com diff --git a/ilamb/ilamb/doc/overview.rst b/ilamb/ilamb/doc/overview.rst deleted file mode 100644 index cb913503..00000000 --- a/ilamb/ilamb/doc/overview.rst +++ /dev/null @@ -1,203 +0,0 @@ -Package Overview -================ - -This tutorial is meant to provide some basic understanding of how the -ILAMB python package works and is organized. The level of information -communicated is aimed at a developer who wants to implement his own -benchmark into the system and needs to understand how to go about -doing so. We will start here with a few simple examples which -demonstrate functionality, and layer in complexity in subsequent -tutorials. - -The ILAMB python package consists of four main objects: ``Variable``, -``ModelResult``, ``Confrontation``, and ``Scoreboard``. We will -discuss the first three in this tutorial. - -The Variable Object -------------------- - -The ``Variable`` object is the basic building block of the ILAMB -package. It keeps track of dimensions as the netCDF variables do, but -also provides data-aware analysis routines which operate on the data -in an intelligent manner. For example, consider the following variable -we can create from the data used in a `previous <./first_steps.html>`_ -tutorial:: - - from ILAMB.Variable import Variable - import os - v = Variable(filename = os.environ["ILAMB_ROOT"] + "/MODELS/CLM40cn/rsus/rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc", - variable_name = "rsus") - -The first two lines here import the functionality we need. The first -imports the ``Variable`` object from the ILAMB package and the second -imports a standard python package which allows us to interact with the -operating system. We need this package to gain access to the -``ILAMB_ROOT`` environment variable explained in the `First Steps -<./first_steps.html>`_ tutorial. Then we create a variable object by -specifying the filename as well as the name of the variable which want -to extract from inside. We can then print this variable:: - - print v - -which will display the following information to the screen:: - - Variable: rsus - -------------- - unit: W m-2 - isTemporal: True (1932) - isSpatial: True (192,288) - nDatasites: N/A - dataShape: (1932, 192, 288) - dataMax: 4.028994e+02 - dataMin: 0.000000e+00 - dataMean: 6.153053e+01 - -The ``Variable`` object understands the dimensionality of the data as -well as its unit and then provides analysis routines which operate -intelligently depending on the type of data present. So for example, -we can find the mean value over the time period of the data by:: - - print v.integrateInTime(mean=True) - -which will display:: - - Variable: rsus_integrated_over_time_and_divided_by_time_period - -------------------------------------------------------------- - unit: W m-2 - isTemporal: False - isSpatial: True (192,288) - nDatasites: N/A - dataShape: (192, 288) - dataMax: 1.386898e+02 - dataMin: 9.787394e+00 - dataMean: 6.148656e+01 - -The returned value is another ``Variable`` object, which now has lost -its temporal dimension because this was integrated out. It represents -the average in time at each grid cell in the original data. The -``Variable`` object has a lot of functionality and will be expanded to -meet needs of developers. For a more complete explanation of the -interface, consult the `documentation -<_generated/ILAMB.Variable.Variable.html>`_. However, the point of -this tutorial is that we use the ``Variable`` object to perform -analysis operations in a uniform and flexible manner. Its full -functionality will be covered in more detail in a future tutorial. - -The ModelResult Object ----------------------- - -The ``ModelResult`` object is meant to make getting a model's -variables easy. We anticipate that researchers will have placed all a -model run's results in a single directory bearing the model name as -well as perhaps version, or forcing. To create this object, we simply -point to the top-level directory where the results are contained:: - - from ILAMB.ModelResult import ModelResult - m = ModelResult(os.environ["ILAMB_ROOT"] + "/MODELS/CLM40cn", - modelname = "CLM40cn") - -When we instantiate the model result, internally we search for all -variables found in all netCDF files contained underneath this -top-level directory. This makes extracting variables simple. We can -extract the same variable as above, but in a much more simple manner -once the model result has been defined:: - - v = m.extractTimeSeries("rsus") - print v - -yields the folling screen output:: - - Variable: rsus - -------------- - unit: W m-2 - isTemporal: True (1932) - isSpatial: True (192,288) - nDatasites: N/A - dataShape: (1932, 192, 288) - dataMax: 4.028994e+02 - dataMin: 0.000000e+00 - dataMean: 6.153053e+01 - -In addition to making the aquisition of model data simpler, if land -fractions and areas are relevant (that is, the variable is spatial), -we will apply them to the variable automatically. The user is only -responsible for having the appropriate datafiles (``areacella`` and -``sftlf``) in the model's directory. Extracting the variables from the -``ModelResult`` object ensures that we handle model data -consistently. The ``ModelResult`` `interface -<_generated/ILAMB.ModelResult.ModelResult.html>`_ is much smaller, and -will be expanded in the future. - -The Confrontation Object ------------------------- - -The ``Confrontation`` object manages the benchmark dataset, the -extraction of the data from the model, the anaylsis performed, as well -as the plotting and generating of results. As a developer, you will be -writing your own ``Confrontation`` objects so it is important to -understand what they are and how they work. First, we will initialize -one to help illustrate their functionality:: - - from ILAMB.Confrontation import Confrontation - c = Confrontation(source = os.environ["ILAMB_ROOT"] + "/DATA/rsus/CERES/rsus_0.5x0.5.nc", - name = "CERES", - variable = "rsus") - -As before, we specify the source data relative to the ``ILAMB_ROOT`` -variable. We also have given the confrontation a name and a variable -to expect. There are two main functions to highlight at this -point. The first has to do with preparing data for comparison:: - - obs,mod = c.stageData(m) - -The ``stageData`` functionality returns both the observational and -model datasets as ``Variable`` objects and in a form in which they are -comparable. For example, if we again print ``mod`` here, we is -analagous to ``v`` above, we see:: - - Variable: rsus - -------------- - unit: W/m2 - isTemporal: True (131) - isSpatial: True (192,288) - nDatasites: N/A - dataShape: (131, 192, 288) - dataMax: 4.028824e+02 - dataMin: 0.000000e+00 - dataMean: 6.035579e+01 - -However, the temporal dimension has been greatly reduced (from 1932 -entries down to 131). This is because the observational dataset is -contemporary and the model starts back in 1850. In addition to -clipping the data, we also convert units if appropriate. - -The second main function of the ``Confrontation`` is to perform the -desired analysis. This happens in the ``confront`` functionality:: - - c.confront(m) - -Where ``m`` is the ``ModelResult`` being passed in. This routine calls -``stageData`` internally, and then performs the desired analysis. The -function does not return anything, but generates an analysis file -which contains the results of the analysis. In this case, you will -find two netCDF4 files in your directory: ``CERES_Benchmark.nc`` and -``CERES_CLM40cn.nc``. You can use ``ncdump`` or ``ncview`` (from -NetCDF Tools) to examine the contents of these files. - -The ``Confrontation`` also handles the plotting and generation of HTML -output pages, but this is a more advanced aspect of the object, -detailed in its interface, shown `here -<_generated/ILAMB.Confrontation.Confrontation.html>`_. - -Summary -------- - -While there is much more to learn in understanding the ILAMB python -package, these are the basic objects and concepts you will need to -grasp to implement new benchmarks and analysis. The basic idea is that -we have encapsulated the notion of benchmark datasets and their -accompanying analysis into a ``Confrontation`` class which operates on -the ``ModelResult`` represented as a ``Variable``. What we have done -here manually is part of what happens inside of the ``ilamb-run`` -script, which we executed in previous tutorials. - diff --git a/ilamb/ilamb/doc/packageapi.rst b/ilamb/ilamb/doc/packageapi.rst deleted file mode 100644 index ac2996a1..00000000 --- a/ilamb/ilamb/doc/packageapi.rst +++ /dev/null @@ -1,132 +0,0 @@ -Package Contents -================ - -We have written this python package with the intent that it be easy to -use on a variety of levels. For the researcher who just wants to setup -basic model-benchmark confrontations using our mean-state analysis, -they may never need to program in python. For those who wish to -perform more complicated comparisons or pose their own analysis, we -present the package contents with some underlying philosophy -concerning the purpose of each object. - -Variable --------- - -This class encapsulates data defined in space/time with common -analysis capabilities. You can think of it as a netCDF variable with -analysis routines that are aware of the spatial/temporal nature of the -data. It is the basic building block on which the analysis portion of -the package is built. - -.. currentmodule:: ILAMB.Variable -.. autosummary:: - :toctree: _generated - :nosignatures: - :template: class.rst - - Variable - -ModelResults ------------- - -This model result class aims to handle model queries efficiently and -uniformly without requiring the user to interface with the source -netCDF files themselves. - -.. currentmodule:: ILAMB.ModelResult -.. autosummary:: - :toctree: _generated - :nosignatures: - :template: class.rst - - ModelResult - -Confrontations --------------- - -In the ILAMB parlance, a *confrontation* is a observational benchmark -dataset and its accompanying analysis. Thus we have implemented the -``Confrontation`` base class which will automatically handle the -querying of data from the model and perform our mean-state -analysis. In anticipation that users will want to write their own -analysis, or have benchmark datasets which compare to a non-algebraic -combination of model inputs, we support confrontation classes which -derive from the base class. The other classes listed here are examples -of how this may be accomplished. - -.. currentmodule:: ILAMB.Confrontation -.. autosummary:: - :toctree: _generated - :nosignatures: - :template: class.rst - - Confrontation - -.. currentmodule:: ILAMB.ConfNBP -.. autosummary:: - :toctree: _generated - :nosignatures: - :template: class.rst - - ConfNBP - -Regions -------- - -This class unifies treatment of regions in ILAMB by allowing regions -to be defined by latitude and longitude bounds as well by netCDF4 -files containing integer indices. - -.. currentmodule:: ILAMB.Regions -.. autosummary:: - :toctree: _generated - :nosignatures: - :template: class.rst - - Regions - -ilamblib --------- - -This module collects many routines which may be helpful but do not -belong with any of the above objects. - -.. currentmodule:: ILAMB.ilamblib -.. autosummary:: - :toctree: _generated - - GenerateDistinctColors - ClipTime - ConvertCalendar - ComposeSpatialGrids - CellAreas - GlobalLatLonGrid - NearestNeighborInterpolation - TrueError - SympifyWithArgsUnits - FromNetCDF4 - CombineVariables - Score - ScoreSeasonalCycle - MakeComparable - AnalysisMeanState - AnalysisRelationship - -Post ----- - -Finally we provide a module with many ways of presenting these -results. While we want to keep data formats open such that researchers -may use plotting code of their own in their own language, we also want -to provide a robust set of tools for generating different views and -organizations of the confrontation results. - -.. currentmodule:: ILAMB.Post -.. autosummary:: - :toctree: _generated - - ColorBar - TaylorDiagram - WhittakerDiagram - RegisterCustomColormaps - BenchmarkSummaryFigure diff --git a/ilamb/ilamb/doc/tutorial.rst b/ilamb/ilamb/doc/tutorial.rst deleted file mode 100644 index c3c4f5b7..00000000 --- a/ilamb/ilamb/doc/tutorial.rst +++ /dev/null @@ -1,39 +0,0 @@ -Tutorials -========= - -Beginner Level --------------- - -The following tutorials are aimed at those who want to use the package -as it is. Perhaps you have an additional dataset to add or your own -model results to evaluate. This is the place to start learning what -you need to know! - -.. toctree:: - :maxdepth: 1 - - install - first_steps - add_model - add_data - format_data - ilamb_fetch - ilamb_run - ilamb_doctor - custom_regions - - -Developer Level ---------------- - -These tutorials start to explain the package functionality in more -depth. The level assumes familiarity with python as well as all the -beginner level tutorials. These tutorials are if you want to develop -your own benchmarks or metrics and contribute to the ILAMB python -package! - -.. toctree:: - :maxdepth: 1 - - overview - confront diff --git a/ilamb/ilamb/setup.py b/ilamb/ilamb/setup.py deleted file mode 100644 index b189267f..00000000 --- a/ilamb/ilamb/setup.py +++ /dev/null @@ -1,109 +0,0 @@ -#!/usr/bin/env python -from setuptools import setup -from codecs import open -import subprocess -import os - -VERSION = '2.3' - -def git_version(): - """ - Return the sha1 of local git HEAD as a string. - """ - def _minimal_ext_cmd(cmd): - # construct minimal environment - env = {} - for k in ['SYSTEMROOT', 'PATH', 'PYTHONPATH']: - v = os.environ.get(k) - if v is not None: - env[k] = v - # LANGUAGE is used on win32 - env['LANGUAGE'] = 'C' - env['LANG'] = 'C' - env['LC_ALL'] = 'C' - out = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - env=env - ).communicate()[0] - return out - try: - out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) - git_revision = out.strip().decode('ascii') - except OSError: - git_revision = "unknown-git" - return git_revision - -def write_text(filename, text): - try: - with open(filename, 'w') as a: - a.write(text) - except Exception as e: - print(e) - -def write_version_py(filename=os.path.join('src/ILAMB', 'generated_version.py')): - cnt = """ -# THIS FILE IS GENERATED FROM ILAMB SETUP.PY -short_version = '%(version)s' -version = '%(version)s' -git_revision = '%(git_revision)s' -full_version = '%(version)s (%%(git_revision)s)' %% { - 'git_revision': git_revision} -release = %(isrelease)s -if not release: - version = full_version -""" - FULL_VERSION = VERSION - if os.path.isdir('.git'): - GIT_REVISION = git_version() - ISRELEASED = False - else: - GIT_REVISION = "RELEASE" - ISRELEASED = True - - FULL_VERSION += '.dev-' + GIT_REVISION - text = cnt % {'version': VERSION, - 'full_version': FULL_VERSION, - 'git_revision': GIT_REVISION, - 'isrelease': str(ISRELEASED)} - write_text(filename, text) - - -here = os.path.abspath(os.path.dirname(__file__)) -with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: - long_description = f.read() - -write_version_py() -setup( - name='ILAMB', - version=VERSION, - description='The International Land Model Benchmarking Package', - long_description=long_description, - url='https://bitbucket.org/ncollier/ilamb', - author='Nathan Collier', - author_email='nathaniel.collier@gmail.com', - #license='MIT', - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering', - #'License :: OSI Approved :: MIT License', - 'Operating System :: MacOS', - 'Operating System :: POSIX', - 'Operating System :: POSIX :: Linux', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - ], - keywords=['benchmarking','earth system modeling','climate modeling','model intercomparison'], - packages=['ILAMB'], - package_dir={'ILAMB' : 'src/ILAMB'}, - scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor','bin/ilamb-table'], - install_requires=['numpy>=1.11.0', - 'matplotlib>=1.4.3', - #'basemap>=1.0.7', # basemap is in pypi but broken, need to manually install - 'netCDF4>=1.1.4', - 'cf_units>=2.0.0', - 'sympy>=0.7.6', - 'mpi4py>=1.3.1', - 'scipy>=0.9.0'] -) diff --git a/ilamb/ilamb/src/ILAMB/ConfDiurnal.py b/ilamb/ilamb/src/ILAMB/ConfDiurnal.py deleted file mode 100644 index de040f2f..00000000 --- a/ilamb/ilamb/src/ILAMB/ConfDiurnal.py +++ /dev/null @@ -1,199 +0,0 @@ -from ILAMB.Confrontation import Confrontation -from ILAMB.Confrontation import getVariableList -import matplotlib.pyplot as plt -import ILAMB.Post as post -from scipy.interpolate import CubicSpline -from mpl_toolkits.basemap import Basemap -from ILAMB.Variable import Variable -from netCDF4 import Dataset -import ILAMB.ilamblib as il -import numpy as np -import os,glob - -def DiurnalReshape(var): - dt = (var.time_bnds[:,1]-var.time_bnds[:,0]).mean() - spd = int(round(1./dt)) - begin = np.argmin(var.time[:(spd-1)]%spd) - end = begin+int(var.time[begin:].size/float(spd))*spd - shp = (-1,spd) + var.data.shape[1:] - cycle = var.data[begin:end].reshape(shp) - tbnd = var.time_bnds[begin:end,:].reshape((-1,spd,2)) % 1 - tbnd = tbnd[0,...] - tbnd[-1,1] = 1. - t = tbnd.mean(axis=1) - return cycle,t,tbnd - -class ConfDiurnal(Confrontation): - """A confrontation for examining the diurnal - """ - def __init__(self,**keywords): - - # Calls the regular constructor - super(ConfDiurnal,self).__init__(**keywords) - - # Setup a html layout for generating web views of the results - pages = [] - - # Mean State page - pages.append(post.HtmlPage("MeanState","Mean State")) - pages[-1].setHeader("CNAME / RNAME / MNAME") - pages[-1].setSections(["Diurnal cycle"]) - pages.append(post.HtmlAllModelsPage("AllModels","All Models")) - pages[-1].setHeader("CNAME / RNAME") - pages[-1].setSections([]) - pages[-1].setRegions(self.regions) - pages.append(post.HtmlPage("DataInformation","Data Information")) - pages[-1].setSections([]) - pages[-1].text = "\n" - with Dataset(self.source) as dset: - for attr in dset.ncattrs(): - pages[-1].text += "

      %s: %s

    \n" % (attr,dset.getncattr(attr).encode('ascii','ignore')) - self.layout = post.HtmlLayout(pages,self.longname) - - def stageData(self,m): - - obs = Variable(filename = self.source, - variable_name = self.variable, - alternate_vars = self.alternate_vars) - if obs.time is None: raise il.NotTemporalVariable() - self.pruneRegions(obs) - - # Try to extract a commensurate quantity from the model - mod = m.extractTimeSeries(self.variable, - alt_vars = self.alternate_vars, - expression = self.derived, - initial_time = obs.time_bnds[ 0,0], - final_time = obs.time_bnds[-1,1], - lats = None if obs.spatial else obs.lat, - lons = None if obs.spatial else obs.lon).convert(obs.unit) - return obs,mod - - def confront(self,m): - - # get the HTML page - page = [page for page in self.layout.pages if "MeanState" in page.name][0] - - # Grab the data - obs,mod = self.stageData(m) - odata,ot,otb = DiurnalReshape(obs) - mdata,mt,mtb = DiurnalReshape(mod) - - n = len(self.lbls) - obs_amp = np.zeros(n) - mod_amp = np.zeros(n) - amp_score = np.zeros(n) - obs_phase = np.zeros(n) - mod_phase = np.zeros(n) - phase_score = np.zeros(n) - for site in range(n): - - # Site name - lbl = self.lbls[site] - skip = False - - # Observational diurnal cycle - tobs = ot + obs.lon[site]/360 - vobs = odata[...,site] - vobs = np.roll(vobs,-tobs.searchsorted(0),axis=1) - tobs = np.roll(tobs,-tobs.searchsorted(0)) - tobs += (tobs<0) - aobs = (vobs.max(axis=1)-vobs.min(axis=1)).mean() - vobs = vobs.mean(axis=0) - if vobs.size == vobs.mask.sum(): skip = True - if not skip: - acyc = CubicSpline(np.hstack([tobs,tobs[0]+1.]), - np.hstack([vobs,vobs[0] ]), - bc_type="periodic") - troot = acyc.derivative().solve() - troot = troot[(troot>=0)*(troot<=1.)] - otmx = troot[acyc(troot).argmax()] - - # Model diurnal cycle - tmod = mt + mod.lon[site]/360 - vmod = mdata[...,site] - vmod = np.roll(vmod,-tmod.searchsorted(0),axis=1) - tmod = np.roll(tmod,-tmod.searchsorted(0)) - tmod += (tmod<0) - amod = (vmod.max(axis=1)-vmod.min(axis=1)).mean() - vmod = vmod.mean(axis=0) - mcyc = CubicSpline(np.hstack([tmod,tmod[0]+1.]), - np.hstack([vmod,vmod[0] ]), - bc_type="periodic") - troot = mcyc.derivative().solve() - troot = troot[(troot>=0)*(troot<=1.)] - mtmx = troot[mcyc(troot).argmax()] - - # Scalars and scores - if skip: - obs_amp [site] = np.nan - obs_phase [site] = np.nan - amp_score [site] = np.nan - phase_score[site] = np.nan - else: - obs_amp [site] = aobs - obs_phase [site] = otmx - amp_score [site] = np.exp(-np.abs(amod-aobs)/aobs) - phase_score[site] = 1-np.abs(mtmx-otmx)/0.5 - mod_amp [site] = amod - mod_phase [site] = mtmx - - # Plot - ts = np.linspace(0,1,100) - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - if not skip: - ax.plot(tobs,vobs,'o',mew=0,markersize=3,color='k') - ax.plot(ts,acyc(ts),'-',color='k') - ax.plot(otmx,acyc(otmx),'o',mew=0,markersize=5,color='k') - ax.plot(tmod,vmod,'o',mew=0,markersize=3,color=m.color) - ax.plot(ts,mcyc(ts),'-',color=m.color) - ax.plot(mtmx,mcyc(mtmx),'o',mew=0,markersize=5,color=m.color) - xt = np.arange(25)[::3] - xtl = ["%02d:00" % xx for xx in xt] - ax.set_xticks (xt/24.) - ax.set_xticklabels(xtl ) - ax.grid(True) - ax.set_xlabel("Mean solar time") - ax.set_ylabel("[%s]" % obs.unit) - plt.savefig(os.path.join(self.output_path,"%s_diurnal_%s.png" % (m.name,lbl))) - plt.close() - - obs_amp = np.ma.masked_invalid(obs_amp) - obs_phase = np.ma.masked_invalid(obs_phase) - amp_score = np.ma.masked_invalid(amp_score) - phase_score = np.ma.masked_invalid(phase_score) - - results = Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="w") - results.setncatts({"name" :m.name, "color":m.color}) - Variable(name="Amplitude global" ,unit=obs.unit,data= mod_amp .mean()).toNetCDF4(results,group="MeanState") - Variable(name="Max time global" ,unit="h" ,data=24*mod_phase.mean()).toNetCDF4(results,group="MeanState") - Variable(name="Amplitude Score global",unit="1" ,data= amp_score.mean()).toNetCDF4(results,group="MeanState") - Variable(name="Phase Score global" ,unit="1" ,data= phase_score.mean()).toNetCDF4(results,group="MeanState") - results.close() - if self.master: - results = Dataset(os.path.join(self.output_path,"%s_Benchmark.nc" % self.name),mode="w") - results.setncatts({"name" :"Benchmark", "color":np.asarray([0.5,0.5,0.5])}) - Variable(name="Amplitude global" ,unit=obs.unit,data= obs_amp .mean()).toNetCDF4(results,group="MeanState") - Variable(name="Max time global" ,unit="h" ,data=24*obs_phase.mean()).toNetCDF4(results,group="MeanState") - results.close() - - def modelPlots(self,m): - - bname = "%s/%s_Benchmark.nc" % (self.output_path,self.name) - fname = "%s/%s_%s.nc" % (self.output_path,self.name,m.name) - if not os.path.isfile(bname): return - if not os.path.isfile(fname): return - - # get the HTML page - page = [page for page in self.layout.pages if "MeanState" in page.name][0] - page.priority = ["Amplitude","Max","Min","Max time","Bias","RMSE","Shift","Score","Overall"] - - for site in range(len(self.lbls)): - - # Site name - lbl = self.lbls[site] - page.addFigure("Diurnal cycle", - lbl, - "MNAME_diurnal_%s.png" % lbl, - side = lbl, - legend = False) - diff --git a/ilamb/ilamb/src/ILAMB/ConfEvapFraction.py b/ilamb/ilamb/src/ILAMB/ConfEvapFraction.py deleted file mode 100644 index 46630614..00000000 --- a/ilamb/ilamb/src/ILAMB/ConfEvapFraction.py +++ /dev/null @@ -1,65 +0,0 @@ -from ILAMB.Confrontation import Confrontation -from mpl_toolkits.basemap import Basemap -from ILAMB.Variable import Variable -from netCDF4 import Dataset -import ILAMB.ilamblib as il -import numpy as np -import os - -class ConfEvapFraction(Confrontation): - - def stageData(self,m): - - energy_threshold = float(self.keywords.get("energy_threshold",20.)) - sh = Variable(filename = os.path.join(os.environ["ILAMB_ROOT"],"DATA/sh/GBAF/sh_0.5x0.5.nc"), - variable_name = "sh") - le = Variable(filename = os.path.join(os.environ["ILAMB_ROOT"],"DATA/le/GBAF/le_0.5x0.5.nc"), - variable_name = "le") - obs = Variable(name = self.variable, - unit = "1", - data = np.ma.masked_array(le.data/(le.data+sh.data), - mask=((le.data<0)+ - (sh.data<0)+ - ((le.data+sh.data) 0: - has_std = True - sds = dset.groups["scalars"].variables[key[0]] - corr[region].append(sds.getncattr("R" )) - std [region].append(sds.getncattr("std")) - - if has_std: - - # Legends - def _alphabeticalBenchmarkFirst(key): - key = key[0].upper() - if key == "BENCHMARK": return 0 - return key - tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) - fig,ax = plt.subplots() - for model,color in tmp: - ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) - handles,labels = ax.get_legend_handles_labels() - plt.close() - ncol = np.ceil(float(len(models))/11.).astype(int) - fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) - ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) - ax.axis('off') - fig.savefig("%s/legend_spatial_variance.png" % self.output_path) - plt.close() - - - page.addFigure("Period mean at surface", - "spatial_variance", - "RNAME_spatial_variance.png", - side = "SPATIAL TAYLOR DIAGRAM", - legend = False) - page.addFigure("Period mean at surface", - "legend_spatial_variance", - "legend_spatial_variance.png", - side = "MODEL COLORS", - legend = False) - if "Benchmark" in models: colors.pop(models.index("Benchmark")) - for region in self.regions: - if not (std.has_key(region) and corr.has_key(region)): continue - if len(std[region]) != len(corr[region]): continue - if len(std[region]) == 0: continue - fig = plt.figure(figsize=(6.0,6.0)) - post.TaylorDiagram(np.asarray(std[region]),np.asarray(corr[region]),1.0,fig,colors) - fig.savefig("%s/%s_spatial_variance.png" % (self.output_path,region)) - plt.close() - - - def modelPlots(self,m): - - def _fheight(region): - if region in ["arctic","southern"]: return 6.8 - return 2.8 - - bname = "%s/%s_Benchmark.nc" % (self.output_path,self.name) - fname = "%s/%s_%s.nc" % (self.output_path,self.name,m.name) - if not os.path.isfile(bname): return - if not os.path.isfile(fname): return - - # get the HTML page - page = [page for page in self.layout.pages if "MeanState" in page.name][0] - - with Dataset(fname) as dataset: - group = dataset.groups["MeanState"] - variables = getVariableList(group) - color = dataset.getncattr("color") - - vname = "timeint_surface_%s" % self.variable - if vname in variables: - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - page.addFigure("Period mean at surface", - "timeint", - "MNAME_RNAME_timeint.png", - side = "MODEL SURFACE MEAN", - legend = True) - for region in self.regions: - fig = plt.figure() - ax = fig.add_axes([0.06,0.025,0.88,0.965]) - var.plot(ax, - region = region, - vmin = self.limits["timeint"]["min"], - vmax = self.limits["timeint"]["max"], - cmap = self.cmap, - land = 0.750, - water = 0.875) - fig.savefig("%s/%s_%s_timeint.png" % (self.output_path,m.name,region)) - plt.close() - - vname = "bias_surface_%s" % self.variable - if vname in variables: - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - page.addFigure("Period mean at surface", - "bias", - "MNAME_RNAME_bias.png", - side = "SURFACE MEAN BIAS", - legend = True) - for region in self.regions: - fig = plt.figure() - ax = fig.add_axes([0.06,0.025,0.88,0.965]) - var.plot(ax, - region = region, - vmin = self.limits["bias"]["min"], - vmax = self.limits["bias"]["max"], - cmap = "seismic", - land = 0.750, - water = 0.875) - fig.savefig("%s/%s_%s_bias.png" % (self.output_path,m.name,region)) - plt.close() - - vname = "biasscore_surface_%s" % self.variable - if vname in variables: - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - page.addFigure("Period mean at surface", - "biasscore", - "MNAME_RNAME_biasscore.png", - side = "SURFACE MEAN BIAS SCORE", - legend = True) - for region in self.regions: - fig = plt.figure() - ax = fig.add_axes([0.06,0.025,0.88,0.965]) - var.plot(ax, - region = region, - vmin = 0, - vmax = 1, - cmap = "RdYlGn", - land = 0.750, - water = 0.875) - fig.savefig("%s/%s_%s_biasscore.png" % (self.output_path,m.name,region)) - plt.close() - - vname = "rmse_surface_%s" % self.variable - if vname in variables: - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - page.addFigure("Period mean at surface", - "rmse", - "MNAME_RNAME_rmse.png", - side = "SURFACE MEAN RMSE", - legend = True) - for region in self.regions: - fig = plt.figure() - ax = fig.add_axes([0.06,0.025,0.88,0.965]) - var.plot(ax, - region = region, - vmin = self.limits["rmse"]["min"], - vmax = self.limits["rmse"]["max"], - cmap = "YlOrRd", - land = 0.750, - water = 0.875) - fig.savefig("%s/%s_%s_rmse.png" % (self.output_path,m.name,region)) - plt.close() - - vname = "rmsescore_surface_%s" % self.variable - if vname in variables: - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - page.addFigure("Period mean at surface", - "rmsescore", - "MNAME_RNAME_rmsescore.png", - side = "SURFACE MEAN RMSE SCORE", - legend = True) - for region in self.regions: - fig = plt.figure() - ax = fig.add_axes([0.06,0.025,0.88,0.965]) - var.plot(ax, - region = region, - vmin = 0, - vmax = 1, - cmap = "RdYlGn", - land = 0.750, - water = 0.875) - fig.savefig("%s/%s_%s_rmsescore.png" % (self.output_path,m.name,region)) - plt.close() - - for region in self.regions: - - vname = "timelonint_of_%s_over_%s" % (self.variable,region) - if vname in variables: - var = Variable(filename=fname,variable_name=vname,groupname="MeanState") - if region == "global": - page.addFigure("Mean regional depth profiles", - "timelonint", - "MNAME_RNAME_timelonint.png", - side = "MODEL DEPTH PROFILE", - legend = True, - longname = "Time/longitude averaged profile") - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - l = np.hstack([var.lat_bnds [:,0],var.lat_bnds [-1,1]]) - d = np.hstack([var.depth_bnds[:,0],var.depth_bnds[-1,1]]) - ind = np.all(var.data.mask,axis=0) - ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) - b = ind.min() - e = ind.max()+1 - ax.pcolormesh(l[b:(e+1)],d,var.data[:,b:e], - vmin = self.limits["timelonint"]["global"]["min"], - vmax = self.limits["timelonint"]["global"]["max"], - cmap = self.cmap) - ax.set_xlabel("latitude") - ax.set_ylim((d.max(),d.min())) - ax.set_ylabel("depth [m]") - fig.savefig("%s/%s_%s_timelonint.png" % (self.output_path,m.name,region)) - plt.close() - - if not self.master: return - - with Dataset(bname) as dataset: - group = dataset.groups["MeanState"] - variables = getVariableList(group) - color = dataset.getncattr("color") - - vname = "timeint_surface_%s" % self.variable - if vname in variables: - var = Variable(filename=bname,variable_name=vname,groupname="MeanState") - page.addFigure("Period mean at surface", - "benchmark_timeint", - "Benchmark_RNAME_timeint.png", - side = "BENCHMARK SURFACE MEAN", - legend = True) - for region in self.regions: - fig = plt.figure() - ax = fig.add_axes([0.06,0.025,0.88,0.965]) - var.plot(ax, - region = region, - vmin = self.limits["timeint"]["min"], - vmax = self.limits["timeint"]["max"], - cmap = self.cmap, - land = 0.750, - water = 0.875) - fig.savefig("%s/Benchmark_%s_timeint.png" % (self.output_path,region)) - plt.close() - - for region in self.regions: - - vname = "timelonint_of_%s_over_%s" % (self.variable,region) - if vname in variables: - var = Variable(filename=bname,variable_name=vname,groupname="MeanState") - if region == "global": - page.addFigure("Mean regional depth profiles", - "benchmark_timelonint", - "Benchmark_RNAME_timelonint.png", - side = "BENCHMARK DEPTH PROFILE", - legend = True, - longname = "Time/longitude averaged profile") - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - l = np.hstack([var.lat_bnds [:,0],var.lat_bnds [-1,1]]) - d = np.hstack([var.depth_bnds[:,0],var.depth_bnds[-1,1]]) - ind = np.all(var.data.mask,axis=0) - ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) - b = ind.min() - e = ind.max()+1 - ax.pcolormesh(l[b:(e+1)],d,var.data[:,b:e], - vmin = self.limits["timelonint"]["global"]["min"], - vmax = self.limits["timelonint"]["global"]["max"], - cmap = self.cmap) - ax.set_xlabel("latitude") - ax.set_ylim((d.max(),d.min())) - ax.set_ylabel("depth [m]") - fig.savefig("%s/Benchmark_%s_timelonint.png" % (self.output_path,region)) - plt.close() - - def determinePlotLimits(self): - - # Pick limit type - max_str = "up99"; min_str = "dn99" - if self.keywords.get("limit_type","99per") == "minmax": - max_str = "max"; min_str = "min" - - # Determine the min/max of variables over all models - limits = {} - for fname in glob.glob("%s/*.nc" % self.output_path): - with Dataset(fname) as dataset: - if "MeanState" not in dataset.groups: continue - group = dataset.groups["MeanState"] - variables = [v for v in group.variables.keys() if (v not in group.dimensions.keys() and - "_bnds" not in v and - group.variables[v][...].size > 1)] - for vname in variables: - var = group.variables[vname] - pname = vname.split("_")[ 0] - if "_over_" in vname: - region = vname.split("_over_")[-1] - if not limits.has_key(pname): limits[pname] = {} - if not limits[pname].has_key(region): - limits[pname][region] = {} - limits[pname][region]["min"] = +1e20 - limits[pname][region]["max"] = -1e20 - limits[pname][region]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) - limits[pname][region]["min"] = min(limits[pname][region]["min"],var.getncattr("min")) - limits[pname][region]["max"] = max(limits[pname][region]["max"],var.getncattr("max")) - else: - if not limits.has_key(pname): - limits[pname] = {} - limits[pname]["min"] = +1e20 - limits[pname]["max"] = -1e20 - limits[pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) - limits[pname]["min"] = min(limits[pname]["min"],var.getncattr(min_str)) - limits[pname]["max"] = max(limits[pname]["max"],var.getncattr(max_str)) - - # Another pass to fix score limits - for pname in limits.keys(): - if "score" in pname: - if "min" in limits[pname].keys(): - limits[pname]["min"] = 0. - limits[pname]["max"] = 1. - else: - for region in limits[pname].keys(): - limits[pname][region]["min"] = 0. - limits[pname][region]["max"] = 1. - self.limits = limits - - # Second pass to plot legends - cmaps = {"bias":"seismic", - "rmse":"YlOrRd"} - for pname in limits.keys(): - - # Pick colormap - cmap = self.cmap - if cmaps.has_key(pname): - cmap = cmaps[pname] - elif "score" in pname: - cmap = "RdYlGn" - - # Need to symetrize? - if pname in ["bias"]: - vabs = max(abs(limits[pname]["min"]),abs(limits[pname]["min"])) - limits[pname]["min"] = -vabs - limits[pname]["max"] = vabs - - # Some plots need legends - if pname in ["timeint","bias","biasscore","rmse","rmsescore","timelonint"]: - if limits[pname].has_key("min"): - fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) - post.ColorBar(ax, - vmin = limits[pname]["min" ], - vmax = limits[pname]["max" ], - label = limits[pname]["unit"], - cmap = cmap) - fig.savefig("%s/legend_%s.png" % (self.output_path,pname)) - plt.close() - else: - fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) - post.ColorBar(ax, - vmin = limits[pname]["global"]["min" ], - vmax = limits[pname]["global"]["max" ], - label = limits[pname]["global"]["unit"], - cmap = cmap) - fig.savefig("%s/legend_%s.png" % (self.output_path,pname)) - plt.close() - - - diff --git a/ilamb/ilamb/src/ILAMB/ConfNBP.py b/ilamb/ilamb/src/ILAMB/ConfNBP.py deleted file mode 100644 index 97a3b47d..00000000 --- a/ilamb/ilamb/src/ILAMB/ConfNBP.py +++ /dev/null @@ -1,172 +0,0 @@ -from Confrontation import Confrontation -from Variable import Variable -from netCDF4 import Dataset -from copy import deepcopy -import ilamblib as il -import pylab as plt -import Post as post -import numpy as np -import os,glob - -class ConfNBP(Confrontation): - """A confrontation for examining the global net ecosystem carbon balance. - - """ - def __init__(self,**keywords): - - # Ugly, but this is how we call the Confrontation constructor - super(ConfNBP,self).__init__(**keywords) - - # Now we overwrite some things which are different here - self.regions = ['global'] - self.layout.regions = self.regions - - def stageData(self,m): - r"""Extracts model data and integrates it over the globe to match the confrontation dataset. - - Parameters - ---------- - m : ILAMB.ModelResult.ModelResult - the model result context - - Returns - ------- - obs : ILAMB.Variable.Variable - the variable context associated with the observational dataset - mod : ILAMB.Variable.Variable - the variable context associated with the model result - - """ - # get the observational data - obs = Variable(filename = self.source, - variable_name = self.variable, - alternate_vars = self.alternate_vars) - - # the model data needs integrated over the globe - mod = m.extractTimeSeries(self.variable, - alt_vars = self.alternate_vars) - mod = mod.integrateInSpace().convert(obs.unit) - tmin = mod.time_bnds[ 0,0] - tmax = mod.time_bnds[-1,1] - obs,mod = il.MakeComparable(obs,mod,clip_ref=True) - - # The obs can go beyond the information which models have - obs.trim(t=[tmin,tmax]) - mod.trim(t=[tmin,tmax]) - - # sign convention is backwards - obs.data *= -1. - mod.data *= -1. - - return obs,mod - - def confront(self,m): - r"""Confronts the input model with the observational data. - - Parameters - ---------- - m : ILAMB.ModelResult.ModelResult - the model results - - """ - # Grab the data - obs,mod = self.stageData(m) - obs_sum = obs.accumulateInTime().convert("Pg") - mod_sum = mod.accumulateInTime().convert("Pg") - - # End of period information - yf = np.round(obs.time_bnds[-1,1]/365.+1850.) - obs_end = Variable(name = "nbp(%4d)" % yf, - unit = obs_sum.unit, - data = obs_sum.data[-1]) - mod_end = Variable(name = "nbp(%4d)" % yf, - unit = mod_sum.unit, - data = mod_sum.data[-1]) - mod_diff = Variable(name = "diff(%4d)" % yf, - unit = mod_sum.unit, - data = mod_sum.data[-1]-obs_sum.data[-1]) - - # Difference score normlized by the uncertainty in the - # accumulation at the end of the time period. - normalizer = 0. - if "GCP" in self.longname: normalizer = 21.6*0.5 - if "Hoffman" in self.longname: normalizer = 84.6*0.5 - dscore = Variable(name = "Difference Score global" % yf, - unit = "1", - data = np.exp(-0.287*np.abs(mod_diff.data/normalizer))) - - # Temporal distribution - skip_taylor = self.keywords.get("skip_taylor",False) - if not skip_taylor: - np.seterr(over='ignore',under='ignore') - std0 = obs.data.std() - std = mod.data.std() - np.seterr(over='raise' ,under='raise' ) - R0 = 1.0 - R = obs.correlation(mod,ctype="temporal") - std /= std0 - score = Variable(name = "Temporal Distribution Score global", - unit = "1", - data = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0))) - - # Change names to make things easier to parse later - obs .name = "spaceint_of_nbp_over_global" - mod .name = "spaceint_of_nbp_over_global" - obs_sum .name = "accumulate_of_nbp_over_global" - mod_sum .name = "accumulate_of_nbp_over_global" - - # Dump to files - results = Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="w") - results.setncatts({"name" :m.name, "color":m.color}) - mod .toNetCDF4(results,group="MeanState") - mod_sum .toNetCDF4(results,group="MeanState") - mod_end .toNetCDF4(results,group="MeanState") - mod_diff .toNetCDF4(results,group="MeanState") - dscore .toNetCDF4(results,group="MeanState") - if not skip_taylor: - score .toNetCDF4(results,group="MeanState",attributes={"std":std,"R":R.data}) - results.close() - - if self.master: - results = Dataset(os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name)),mode="w") - results.setncatts({"name" :"Benchmark", "color":np.asarray([0.5,0.5,0.5])}) - obs .toNetCDF4(results,group="MeanState") - obs_sum .toNetCDF4(results,group="MeanState") - obs_end .toNetCDF4(results,group="MeanState") - results.close() - - - def compositePlots(self): - - # we want to run the original and also this additional plot - super(ConfNBP,self).compositePlots() - - # get the HTML page - page = [page for page in self.layout.pages if "MeanState" in page.name][0] - - colors = [] - corr = [] - std = [] - for fname in glob.glob(os.path.join(self.output_path,"*.nc")): - if "Benchmark" in fname: continue - dataset = Dataset(fname) - if "MeanState" not in dataset.groups: continue - dset = dataset.groups["MeanState"] - colors.append(dataset.getncattr("color")) - key = [v for v in dset.groups["scalars"].variables.keys() if ("Temporal Distribution Score" in v)] - if len(key) > 0: - sds = dset.groups["scalars"].variables[key[0]] - corr.append(sds.getncattr("R" )) - std .append(sds.getncattr("std")) - - # temporal distribution Taylor plot - if len(corr) > 0: - page.addFigure("Spatially integrated regional mean", - "temporal_variance", - "temporal_variance.png", - side = "TEMPORAL TAYLOR DIAGRAM", - legend = False) - fig = plt.figure(figsize=(6.0,6.0)) - post.TaylorDiagram(np.asarray(std),np.asarray(corr),1.0,fig,colors) - fig.savefig(os.path.join(self.output_path,"temporal_variance.png")) - plt.close() diff --git a/ilamb/ilamb/src/ILAMB/ConfPermafrost.py b/ilamb/ilamb/src/ILAMB/ConfPermafrost.py deleted file mode 100644 index 92c2ead4..00000000 --- a/ilamb/ilamb/src/ILAMB/ConfPermafrost.py +++ /dev/null @@ -1,223 +0,0 @@ -from Confrontation import Confrontation -from mpl_toolkits.basemap import Basemap -from Variable import Variable -from Post import ColorBar -import matplotlib.pyplot as plt -from netCDF4 import Dataset -import ilamblib as il -import numpy as np - -class ConfPermafrost(Confrontation): - - def __init__(self,**keywords): - - # Ugly, but this is how we call the Confrontation constructor - super(ConfPermafrost,self).__init__(**keywords) - - # Now we overwrite some things which are different here - self.layout - self.regions = ["global"] - self.layout.regions = self.regions - self.weight = { "Obs Score" : 1., - "Mod Score" : 1. } - for page in self.layout.pages: - page.setMetricPriority(["Total Area" , - "Overlap Area", - "Missed Area" , - "Excess Area" , - "Obs Score" , - "Mod Score" , - "Overall Score"]) - - def stageData(self,m): - - obs = Variable(filename = self.source, - variable_name = "permafrost_extent") - - # These parameters may be changed from the configure file - y0 = float(self.keywords.get("y0" ,1970.)) # [yr] beginning year to include in analysis - yf = float(self.keywords.get("yf" ,2000.)) # [yr] end year to include in analysis - dmax = float(self.keywords.get("dmax",3.5)) # [m] consider layers where depth in is the range [0,dmax] - Teps = float(self.keywords.get("Teps",273.15)) # [K] temperature below which we assume permafrost occurs - - t0 = (y0 -1850.)*365. - tf = (yf+1-1850.)*365. - mod = m.extractTimeSeries(self.variable, - initial_time = t0, - final_time = tf) - mod.trim(t = [t0 ,tf ], - lat = [obs.lat.min(),90 ], - d = [0 ,dmax]) - mod = mod.annualCycle() - Tmax = mod.data.max(axis=0) - table = np.zeros(Tmax.shape[-2:]) - table[...] = np.NAN - thaw = np.zeros(table.shape,dtype=bool) - for i in range(mod.depth_bnds.shape[0]-1,-1,-1): - thaw += (Tmax[i]>=Teps) - frozen = np.where((Tmax[i]0: - has_cycle = True - cycle[region].append(Variable(filename=fname,groupname="MeanState",variable_name=key[0])) - - if not std. has_key(region): std [region] = [] - if not corr. has_key(region): corr [region] = [] - - key = [] - if "scalars" in dset.groups: - key = [v for v in dset.groups["scalars"].variables.keys() if ("Spatial Distribution Score" in v and region in v)] - if len(key) > 0: - has_std = True - sds = dset.groups["scalars"].variables[key[0]] - corr[region].append(sds.getncattr("R" )) - std [region].append(sds.getncattr("std")) - - # composite annual cycle plot - if has_cycle and len(models) > 2: - page.addFigure("Spatially integrated regional mean", - "compcycle", - "RNAME_compcycle.png", - side = "ANNUAL CYCLE", - legend = False) - - for region in self.regions: - if not cycle.has_key(region): continue - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - for name,color,var in zip(models,colors,cycle[region]): - dy = 0.05*(self.limits["cycle"][region]["max"]-self.limits["cycle"][region]["min"]) - var.plot(ax,lw=2,color=color,label=name, - ticks = time_opts["cycle"]["ticks"], - ticklabels = time_opts["cycle"]["ticklabels"], - vmin = self.limits["cycle"][region]["min"]-dy, - vmax = self.limits["cycle"][region]["max"]+dy) - ylbl = time_opts["cycle"]["ylabel"] - if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) - ax.set_ylabel(ylbl) - fig.savefig(os.path.join(self.output_path,"%s_compcycle.png" % (region))) - plt.close() - - # plot legends with model colors (sorted with Benchmark data on top) - page.addFigure("Spatially integrated regional mean", - "legend_compcycle", - "legend_compcycle.png", - side = "MODEL COLORS", - legend = False) - def _alphabeticalBenchmarkFirst(key): - key = key[0].upper() - if key == "BENCHMARK": return 0 - return key - tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) - fig,ax = plt.subplots() - for model,color in tmp: - ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) - handles,labels = ax.get_legend_handles_labels() - plt.close() - - ncol = np.ceil(float(len(models))/11.).astype(int) - if ncol > 0: - fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) - ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) - ax.axis('off') - fig.savefig(os.path.join(self.output_path,"legend_compcycle.png")) - fig.savefig(os.path.join(self.output_path,"legend_spatial_variance.png")) - fig.savefig(os.path.join(self.output_path,"legend_temporal_variance.png")) - plt.close() - - # spatial distribution Taylor plot - if has_std: - page.addFigure("Temporally integrated period mean", - "spatial_variance", - "RNAME_spatial_variance.png", - side = "SPATIAL TAYLOR DIAGRAM", - legend = False) - page.addFigure("Temporally integrated period mean", - "legend_spatial_variance", - "legend_spatial_variance.png", - side = "MODEL COLORS", - legend = False) - if "Benchmark" in models: colors.pop(models.index("Benchmark")) - for region in self.regions: - if not (std.has_key(region) and corr.has_key(region)): continue - if len(std[region]) != len(corr[region]): continue - if len(std[region]) == 0: continue - fig = plt.figure(figsize=(6.0,6.0)) - post.TaylorDiagram(np.asarray(std[region]),np.asarray(corr[region]),1.0,fig,colors) - fig.savefig(os.path.join(self.output_path,"%s_spatial_variance.png" % region)) - plt.close() - - def modelPlots(self,m): - """For a given model, create the plots of the analysis results. - - This routine will extract plotting information out of the - netCDF file which results from the analysis and create - plots. Note that determinePlotLimits should be called before - this routine. - - """ - self._relationship(m) - bname = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name )) - fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) - if not os.path.isfile(bname): return - if not os.path.isfile(fname): return - - # get the HTML page - page = [page for page in self.layout.pages if "MeanState" in page.name][0] - - with Dataset(fname) as dataset: - group = dataset.groups["MeanState"] - variables = getVariableList(group) - color = dataset.getncattr("color") - for vname in variables: - - # is this a variable we need to plot? - pname = vname.split("_")[0] - if group.variables[vname][...].size <= 1: continue - var = Variable(filename=fname,groupname="MeanState",variable_name=vname) - - if (var.spatial or (var.ndata is not None)) and not var.temporal: - - # grab plotting options - if pname not in self.limits.keys(): continue - opts = space_opts[pname] - - # add to html layout - page.addFigure(opts["section"], - pname, - opts["pattern"], - side = opts["sidelbl"], - legend = opts["haslegend"]) - - # plot variable - for region in self.regions: - fig = plt.figure(figsize=(6.8,2.8)) - ax = fig.add_axes([0.06,0.025,0.88,0.965]) - var.plot(ax, - region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) - plt.close() - - # Jumping through hoops to get the benchmark plotted and in the html output - if self.master and (pname == "timeint" or pname == "phase" or pname == "iav"): - - opts = space_opts[pname] - - # add to html layout - page.addFigure(opts["section"], - "benchmark_%s" % pname, - opts["pattern"].replace("MNAME","Benchmark"), - side = opts["sidelbl"].replace("MODEL","BENCHMARK"), - legend = True) - - # plot variable - obs = Variable(filename=bname,groupname="MeanState",variable_name=vname) - for region in self.regions: - fig = plt.figure(figsize=(6.8,2.8)) - ax = fig.add_axes([0.06,0.025,0.88,0.965]) - obs.plot(ax, - region = region, - vmin = self.limits[pname]["min"], - vmax = self.limits[pname]["max"], - cmap = self.limits[pname]["cmap"]) - fig.savefig(os.path.join(self.output_path,"Benchmark_%s_%s.png" % (region,pname))) - plt.close() - - if not (var.spatial or (var.ndata is not None)) and var.temporal: - - # grab the benchmark dataset to plot along with - obs = Variable(filename=bname,groupname="MeanState",variable_name=vname).convert(var.unit) - - # grab plotting options - opts = time_opts[pname] - - # add to html layout - page.addFigure(opts["section"], - pname, - opts["pattern"], - side = opts["sidelbl"], - legend = opts["haslegend"]) - - # plot variable - for region in self.regions: - if region not in vname: continue - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - obs.plot(ax,lw=2,color='k',alpha=0.5) - var.plot(ax,lw=2,color=color,label=m.name, - ticks =opts["ticks"], - ticklabels=opts["ticklabels"]) - - dy = 0.05*(self.limits[pname][region]["max"]-self.limits[pname][region]["min"]) - ax.set_ylim(self.limits[pname][region]["min"]-dy, - self.limits[pname][region]["max"]+dy) - ylbl = opts["ylabel"] - if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) - ax.set_ylabel(ylbl) - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) - plt.close() - - logger.info("[%s][%s] Success" % (self.longname,m.name)) - - def sitePlots(self,m): - """ - - """ - if not self.hasSites: return - - obs,mod = self.stageData(m) - for i in range(obs.ndata): - fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) - tmask = np.where(mod.data.mask[:,i]==False)[0] - if tmask.size > 0: - tmin,tmax = tmask[[0,-1]] - else: - tmin = 0; tmax = mod.time.size-1 - - t = mod.time[tmin:(tmax+1) ] - x = mod.data[tmin:(tmax+1),i] - y = obs.data[tmin:(tmax+1),i] - ax.plot(t,y,'-k',lw=2,alpha=0.5) - ax.plot(t,x,'-',color=m.color) - - ind = np.where(t % 365 < 30.)[0] - ticks = t[ind] - (t[ind] % 365) - ticklabels = (ticks/365.+1850.).astype(int) - ax.set_xticks (ticks ) - ax.set_xticklabels(ticklabels) - ax.set_ylabel(post.UnitStringToMatplotlib(mod.unit)) - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,self.lbls[i],"time"))) - plt.close() - - - def generateHtml(self): - """Generate the HTML for the results of this confrontation. - - This routine opens all netCDF files and builds a table of - metrics. Then it passes the results to the HTML generator and - saves the result in the output directory. This only occurs on - the confrontation flagged as master. - - """ - # only the master processor needs to do this - if not self.master: return - - for page in self.layout.pages: - - # build the metric dictionary - metrics = {} - page.models = [] - for fname in glob.glob(os.path.join(self.output_path,"*.nc")): - with Dataset(fname) as dataset: - mname = dataset.getncattr("name") - if mname != "Benchmark": page.models.append(mname) - if not dataset.groups.has_key(page.name): continue - group = dataset.groups[page.name] - - # if the dataset opens, we need to add the model (table row) - metrics[mname] = {} - - # each model will need to have all regions - for region in self.regions: metrics[mname][region] = {} - - # columns in the table will be in the scalars group - if not group.groups.has_key("scalars"): continue - - # we add scalars to the model/region based on the region - # name being in the variable name. If no region is found, - # we assume it is the global region. - grp = group.groups["scalars"] - for vname in grp.variables.keys(): - found = False - for region in self.regions: - if region in vname: - found = True - var = grp.variables[vname] - name = vname.replace(region,"") - metrics[mname][region][name] = Variable(name = name, - unit = var.units, - data = var[...]) - if not found: - var = grp.variables[vname] - metrics[mname]["global"][vname] = Variable(name = vname, - unit = var.units, - data = var[...]) - page.setMetrics(metrics) - - # write the HTML page - f = file(os.path.join(self.output_path,"%s.html" % (self.name)),"w") - f.write(str(self.layout)) - f.close() - - def _relationship(self,m,nbin=25): - """ - - """ - - def _retrieveData(filename): - key = None - with Dataset(filename,mode="r") as dset: - key = [v for v in dset.groups["MeanState"].variables.keys() if "timeint_" in v] - return Variable(filename = filename, - groupname = "MeanState", - variable_name = key[0]) - - def _checkLim(data,lim): - if lim is None: - lim = [min(data.min(),data.min()), - max(data.max(),data.max())] - delta = 1e-8*(lim[1]-lim[0]) - lim[0] -= delta - lim[1] += delta - else: - assert type(lim) == type([]) - assert len (lim) == 2 - return lim - - def _limitExtents(vars): - lim = [+1e20,-1e20] - for v in vars: - lmin,lmax = _checkLim(v.data,None) - lim[0] = min(lmin,lim[0]) - lim[1] = max(lmax,lim[1]) - return lim - - def _buildDistributionResponse(ind,dep,ind_lim=None,dep_lim=None,region=None,nbin=25,eps=3e-3): - - r = Regions() - - # Checks on the input parameters - assert np.allclose(ind.data.shape,dep.data.shape) - ind_lim = _checkLim(ind.data,ind_lim) - dep_lim = _checkLim(dep.data,dep_lim) - - # Mask data - mask = ind.data.mask + dep.data.mask - if region is not None: mask += r.getMask(region,ind) - x = ind.data[mask==False].flatten() - y = dep.data[mask==False].flatten() - - # Compute normalized 2D distribution - dist,xedges,yedges = np.histogram2d(x,y, - bins = [nbin,nbin], - range = [ind_lim,dep_lim]) - dist = np.ma.masked_values(dist.T,0).astype(float) - dist /= dist.sum() - - # Compute the functional response - which_bin = np.digitize(x,xedges).clip(1,xedges.size-1)-1 - mean = np.ma.zeros(xedges.size-1) - std = np.ma.zeros(xedges.size-1) - cnt = np.ma.zeros(xedges.size-1) - np.seterr(under='ignore') - for i in range(mean.size): - yi = y[which_bin==i] - cnt [i] = yi.size - mean[i] = yi.mean() - std [i] = yi.std() - mean = np.ma.masked_array(mean,mask = (cnt/cnt.sum()) < eps) - std = np.ma.masked_array( std,mask = (cnt/cnt.sum()) < eps) - np.seterr(under='warn') - return dist,xedges,yedges,mean,std - - def _scoreDistribution(ref,com): - mask = ref.mask + com.mask - ref = np.ma.masked_array(ref.data,mask=mask).compressed() - com = np.ma.masked_array(com.data,mask=mask).compressed() - return np.sqrt(((np.sqrt(ref)-np.sqrt(com))**2).sum())/np.sqrt(2) - - def _scoreFunction(ref,com): - mask = ref.mask + com.mask - ref = np.ma.masked_array(ref.data,mask=mask).compressed() - com = np.ma.masked_array(com.data,mask=mask).compressed() - return np.exp(-np.linalg.norm(ref-com)/np.linalg.norm(ref)) - - def _plotDistribution(dist,xedges,yedges,xlabel,ylabel,filename): - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - pc = ax.pcolormesh(xedges, yedges, dist, - norm = LogNorm(), - cmap = 'plasma' if plt.cm.cmap_d.has_key('plasma') else 'summer', - vmin = 1e-4, vmax = 1e-1) - div = make_axes_locatable(ax) - fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), - orientation="vertical",label="Fraction of total datasites") - ax.set_xlabel(xlabel,fontsize = 12) - ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) - ax.set_xlim(xedges[0],xedges[-1]) - ax.set_ylim(yedges[0],yedges[-1]) - fig.savefig(filename) - plt.close() - - def _plotDifference(ref,com,xedges,yedges,xlabel,ylabel,filename): - ref = np.ma.copy(ref) - com = np.ma.copy(com) - ref.data[np.where(ref.mask)] = 0. - com.data[np.where(com.mask)] = 0. - diff = np.ma.masked_array(com.data-ref.data,mask=ref.mask*com.mask) - lim = np.abs(diff).max() - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - pc = ax.pcolormesh(xedges, yedges, diff, - cmap = 'Spectral_r', - vmin = -lim, vmax = +lim) - div = make_axes_locatable(ax) - fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), - orientation="vertical",label="Distribution Difference") - ax.set_xlabel(xlabel,fontsize = 12) - ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) - ax.set_xlim(xedges[0],xedges[-1]) - ax.set_ylim(yedges[0],yedges[-1]) - fig.savefig(filename) - plt.close() - - def _plotFunction(ref_mean,ref_std,com_mean,com_std,xedges,yedges,xlabel,ylabel,color,filename): - - xe = 0.5*(xedges[:-1]+xedges[1:]) - delta = 0.1*np.diff(xedges).mean() - - # reference function - ref_x = xe - delta - ref_y = ref_mean - ref_e = ref_std - if not (ref_mean.mask==False).all(): - ind = np.where(ref_mean.mask==False) - ref_x = xe [ind]-delta - ref_y = ref_mean[ind] - ref_e = ref_std [ind] - - # comparison function - com_x = xe + delta - com_y = com_mean - com_e = com_std - if not (com_mean.mask==False).all(): - ind = np.where(com_mean.mask==False) - com_x = xe [ind]-delta - com_y = com_mean[ind] - com_e = com_std [ind] - - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - ax.errorbar(ref_x,ref_y,yerr=ref_e,fmt='-o',color='k') - ax.errorbar(com_x,com_y,yerr=com_e,fmt='-o',color=color) - ax.set_xlabel(xlabel,fontsize = 12) - ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) - ax.set_xlim(xedges[0],xedges[-1]) - ax.set_ylim(yedges[0],yedges[-1]) - fig.savefig(filename) - plt.close() - - # If there are no relationships to analyze, get out of here - if self.relationships is None: return - - # Get the HTML page - page = [page for page in self.layout.pages if "Relationships" in page.name] - if len(page) == 0: return - page = page[0] - - # Try to get the dependent data from the model and obs - try: - ref_dep = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) - com_dep = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name ))) - dep_name = self.longname.split("/")[0] - dep_min = self.limits["timeint"]["min"] - dep_max = self.limits["timeint"]["max"] - except: - return - - with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="r+") as results: - - # Grab/create a relationship and scalars group - group = None - if "Relationships" not in results.groups: - group = results.createGroup("Relationships") - else: - group = results.groups["Relationships"] - if "scalars" not in group.groups: - scalars = group.createGroup("scalars") - else: - scalars = group.groups["scalars"] - - # for each relationship... - for c in self.relationships: - - # try to get the independent data from the model and obs - try: - ref_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) - com_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) - ind_name = c.longname.split("/")[0] - ind_min = c.limits["timeint"]["min"]-1e-12 - ind_max = c.limits["timeint"]["max"]+1e-12 - except: - continue - - # Add figures to the html page - page.addFigure(c.longname, - "benchmark_rel_%s" % ind_name, - "Benchmark_RNAME_rel_%s.png" % ind_name, - legend = False, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, - page.addFigure(c.longname, - "rel_%s" % ind_name, - "MNAME_RNAME_rel_%s.png" % ind_name, - legend = False, - benchmark = False) - page.addFigure(c.longname, - "rel_%s" % ind_name, - "MNAME_RNAME_rel_%s.png" % ind_name, - legend = False, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, - benchmark = False) - page.addFigure(c.longname, - "rel_diff_%s" % ind_name, - "MNAME_RNAME_rel_diff_%s.png" % ind_name, - legend = False, - benchmark = False) - page.addFigure(c.longname, - "rel_func_%s" % ind_name, - "MNAME_RNAME_rel_func_%s.png" % ind_name, - legend = False, - benchmark = False) - - # Analysis over regions - lim_dep = [dep_min,dep_max] - lim_ind = [ind_min,ind_max] - longname = c.longname.split('/')[0] - for region in self.regions: - ref_dist = _buildDistributionResponse(ref_ind,ref_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) - com_dist = _buildDistributionResponse(com_ind,com_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) - - # Make the plots - _plotDistribution(ref_dist[0],ref_dist[1],ref_dist[2], - "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), - "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), - os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) - _plotDistribution(com_dist[0],com_dist[1],com_dist[2], - "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), - "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), - os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) - _plotDifference (ref_dist[0],com_dist[0],ref_dist[1],ref_dist[2], - "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), - "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), - os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) - _plotFunction (ref_dist[3],ref_dist[4],com_dist[3],com_dist[4],ref_dist[1],ref_dist[2], - "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), - "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), - m.color, - os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) - - # Score the distribution - score = _scoreDistribution(ref_dist[0],com_dist[0]) - sname = "%s Hellinger Distance %s" % (longname,region) - if sname in scalars.variables: - scalars.variables[sname][0] = score - else: - Variable(name = sname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - # Score the functional response - score = _scoreFunction(ref_dist[3],com_dist[3]) - sname = "%s RMSE Score %s" % (longname,region) - if sname in scalars.variables: - scalars.variables[sname][0] = score - else: - Variable(name = sname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - - page.addFigure(c.longname, - "rel_func_%s" % ind_name, - "MNAME_RNAME_rel_func_%s.png" % ind_name, - legend = False, - benchmark = False) - - # Analysis over regions - lim_dep = [dep_min,dep_max] - lim_ind = [ind_min,ind_max] - longname = c.longname.split('/')[0] - for region in self.regions: - ref_dist = _buildDistributionResponse(ref_ind,ref_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) - com_dist = _buildDistributionResponse(com_ind,com_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) - - # Make the plots - _plotDistribution(ref_dist[0],ref_dist[1],ref_dist[2], - "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), - "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), - os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) - _plotDistribution(com_dist[0],com_dist[1],com_dist[2], - "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), - "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), - os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) - _plotDifference (ref_dist[0],com_dist[0],ref_dist[1],ref_dist[2], - "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), - "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), - os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) - _plotFunction (ref_dist[3],ref_dist[4],com_dist[3],com_dist[4],ref_dist[1],ref_dist[2], - "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), - "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), - m.color, - os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) - - # Score the distribution - score = _scoreDistribution(ref_dist[0],com_dist[0]) - sname = "%s Hellinger Distance %s" % (longname,region) - if sname in scalars.variables: - scalars.variables[sname][0] = score - else: - Variable(name = sname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - # Score the functional response - score = _scoreFunction(ref_dist[3],com_dist[3]) - sname = "%s RMSE Score %s" % (longname,region) - if sname in scalars.variables: - scalars.variables[sname][0] = score - else: - Variable(name = sname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - # score the relationship - i0,i1 = np.where(np.abs(obs_x[:,np.newaxis]-mod_x)<1e-12) - obs_y = obs_y[i0]; mod_y = mod_y[i1] - isnan = np.isnan(obs_y)*np.isnan(mod_y) - obs_y[isnan] = 0.; mod_y[isnan] = 0. - score = np.exp(-np.linalg.norm(obs_y-mod_y)/np.linalg.norm(obs_y)) - vname = '%s RMSE Score %s' % (c.longname.split('/')[0],region) - if vname in scalars.variables: - scalars.variables[vname][0] = score - else: - Variable(name = vname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - # score the relationship - i0,i1 = np.where(np.abs(obs_x[:,np.newaxis]-mod_x)<1e-12) - score = np.exp(-np.linalg.norm(obs_y[i0]-mod_y[i1])/np.linalg.norm(obs_y[i0])) - vname = '%s RMSE Score %s' % (c.longname.split('/')[0],region) - if vname in scalars.variables: - scalars.variables[vname][0] = score - else: - Variable(name = vname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - -class FileContextManager(): - - def __init__(self,master,mod_results,obs_results): - - self.master = master - self.mod_results = mod_results - self.obs_results = obs_results - self.mod_dset = None - self.obs_dset = None - - def __enter__(self): - - # Open the file on entering, both if you are the master - self.mod_dset = Dataset(self.mod_results,mode="w") - if self.master: self.obs_dset = Dataset(self.obs_results,mode="w") - return self - - def __exit__(self, exc_type, exc_value, traceback): - - # Always close the file(s) on exit - self.mod_dset.close() - if self.master: self.obs_dset.close() - - # If an exception occurred, also remove the files - if exc_type is not None: - os.system("rm -f %s" % self.mod_results) - - diff --git a/ilamb/ilamb/src/ILAMB/ModelResult.py b/ilamb/ilamb/src/ILAMB/ModelResult.py deleted file mode 100644 index 908d7371..00000000 --- a/ilamb/ilamb/src/ILAMB/ModelResult.py +++ /dev/null @@ -1,344 +0,0 @@ -from Variable import Variable -from netCDF4 import Dataset -import ilamblib as il -import numpy as np -import glob,os -from mpi4py import MPI -import logging - -logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) - -class ModelResult(): - """A class for exploring model results. - - This class provides a simplified way of accessing model - results. It is essentially a pointer to a top level directory and - defines the model as all netCDF4 files found in its - subdirectories. If this directory contains model output from - several runs or experiments, you may wish to specify a string (the - *filter* argument) which we will require to be in the filename for - it to be considered part of the model. - - Parameters - ---------- - path : str - the full path to the directory which contains the model result - files - modelname : str, optional - a string representing the name of the model, will be used as a - label in plot legends - color : 3-tuple, optional - a normalized tuple representing a color in RGB color space, - will be used to color line plots - filter : str, optional - this string must be in file's name for it to be considered as - part of the model results - model_year : 2-tuple of int, optional - used to shift model times, all model years at model_year[0] - are shifted to model_year[1] - """ - def __init__(self,path,modelname="unamed",color=(0,0,0),filter="",model_year=None): - self.path = path - self.color = color - self.filter = filter - self.shift = 0. - if model_year is not None: self.shift = (model_year[1]-model_year[0])*365. - self.name = modelname - self.confrontations = {} - self.cell_areas = None - self.land_fraction = None - self.land_areas = None - self.land_area = None - self.lat = None - self.lon = None - self.lat_bnds = None - self.lon_bnds = None - self.variables = None - self.extents = np.asarray([[-90.,+90.],[-180.,+180.]]) - self._findVariables() - self._getGridInformation() - - def _findVariables(self): - """Loops through the netCDF4 files in a model's path and builds a dictionary of which variables are in which files. - """ - def _get(key,dset): - dim_name = key - try: - v = dset.variables[key] - dim_bnd_name = v.getncattr("bounds") - except: - dim_bnd_name = None - return dim_name,dim_bnd_name - - variables = {} - for subdir, dirs, files in os.walk(self.path): - for fileName in files: - if ".nc" not in fileName: continue - if self.filter not in fileName: continue - pathName = os.path.join(subdir,fileName) - dataset = Dataset(pathName) - - # populate dictionary for which variables are in which files - for key in dataset.variables.keys(): - if not variables.has_key(key): - variables[key] = [] - variables[key].append(pathName) - - # determine spatial extents - lats = [key for key in variables.keys() if (key.lower().startswith("lat" ) or - key.lower(). endswith("lat" ))] - lons = [key for key in variables.keys() if (key.lower().startswith("lon" ) or - key.lower(). endswith("lon" ) or - key.lower().startswith("long") or - key.lower(). endswith("long"))] - for key in lats: - for pathName in variables[key]: - with Dataset(pathName) as dset: - lat = dset.variables[key][...] - if lat.size == 1: continue - self.extents[0,0] = max(self.extents[0,0],lat.min()) - self.extents[0,1] = min(self.extents[0,1],lat.max()) - for key in lons: - for pathName in variables[key]: - with Dataset(pathName) as dset: - lon = dset.variables[key][...] - if lon.size == 1: continue - if lon.ndim < 1 or lon.ndim > 2: continue - lon = (lon<=180)*lon + (lon>180)*(lon-360) + (lon<-180)*360 - self.extents[1,0] = max(self.extents[1,0],lon.min()) - self.extents[1,1] = min(self.extents[1,1],lon.max()) - - # fix extents - eps = 5. - if self.extents[0,0] < (- 90.+eps): self.extents[0,0] = - 90. - if self.extents[0,1] > (+ 90.-eps): self.extents[0,1] = + 90. - if self.extents[1,0] < (-180.+eps): self.extents[1,0] = -180. - if self.extents[1,1] > (+180.-eps): self.extents[1,1] = +180. - self.variables = variables - - def _getGridInformation(self): - """Looks in the model output for cell areas as well as land fractions. - """ - # Are there cell areas associated with this model? - if "areacella" not in self.variables.keys(): return - f = Dataset(self.variables["areacella"][0]) - self.cell_areas = f.variables["areacella"][...] - self.lat = f.variables["lat"][...] - self.lon = f.variables["lon"][...] - self.lat_bnds = np.zeros(self.lat.size+1) - self.lat_bnds[:-1] = f.variables["lat_bnds"][:,0] - self.lat_bnds[-1] = f.variables["lat_bnds"][-1,1] - self.lon_bnds = np.zeros(self.lon.size+1) - self.lon_bnds[:-1] = f.variables["lon_bnds"][:,0] - self.lon_bnds[-1] = f.variables["lon_bnds"][-1,1] - - # Now we do the same for land fractions - if "sftlf" not in self.variables.keys(): - self.land_areas = self.cell_areas - else: - self.land_fraction = (Dataset(self.variables["sftlf"][0]).variables["sftlf"])[...] - # some models represent the fraction as a percent - if np.ma.max(self.land_fraction) > 1: self.land_fraction *= 0.01 - self.land_areas = self.cell_areas*self.land_fraction - self.land_area = np.ma.sum(self.land_areas) - return - - def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time=-1e20,final_time=1e20,output_unit="",expression=None): - """Extracts a time series of the given variable from the model. - - Parameters - ---------- - variable : str - name of the variable to extract - alt_vars : list of str, optional - alternate variables to search for if *variable* is not found - initial_time : float, optional - include model results occurring after this time - final_time : float, optional - include model results occurring before this time - output_unit : str, optional - if specified, will try to convert the units of the variable - extract to these units given. - lats : numpy.ndarray, optional - a 1D array of latitude locations at which to extract information - lons : numpy.ndarray, optional - a 1D array of longitude locations at which to extract information - expression : str, optional - an algebraic expression describing how to combine model outputs - - Returns - ------- - var : ILAMB.Variable.Variable - the extracted variable - - """ - # prepend the target variable to the list of possible variables - altvars = list(alt_vars) - altvars.insert(0,variable) - - # checks on input consistency - if lats is not None: assert lons is not None - if lons is not None: assert lats is not None - if lats is not None: assert lats.shape == lons.shape - - # create a list of datafiles which have a non-null intersection - # over the desired time range - V = [] - tmin = 1e20 - tmax = -1e20 - for v in altvars: - if not self.variables.has_key(v): continue - for pathName in self.variables[v]: - var = Variable(filename = pathName, - variable_name = variable, - alternate_vars = altvars[1:], - area = self.land_areas, - t0 = initial_time - self.shift, - tf = final_time - self.shift) - tmin = min(tmin,var.time_bnds.min()) - tmax = max(tmax,var.time_bnds.max()) - if ((var.time_bnds.max() < initial_time - self.shift) or - (var.time_bnds.min() > final_time - self.shift)): continue - if lats is not None and var.ndata: - r = np.sqrt((lats[:,np.newaxis]-var.lat)**2 + - (lons[:,np.newaxis]-var.lon)**2) - imin = r.argmin(axis=1) - rmin = r. min(axis=1) - imin = imin[np.where(rmin<1.0)] - if imin.size == 0: - logger.debug("[%s] Could not find [%s] at the input sites in the model results" % (self.name,",".join(altvars))) - raise il.VarNotInModel() - var.lat = var.lat [ imin] - var.lon = var.lon [ imin] - var.data = var.data[:,imin] - var.ndata = var.data.shape[1] - if lats is not None and var.spatial: var = var.extractDatasites(lats,lons) - var.time += self.shift - var.time_bnds += self.shift - V.append(var) - if len(V) > 0: break - - # If we didn't find any files, try to put together the - # variable from a given expression - if len(V) == 0: - if expression is not None: - v = self.derivedVariable(variable, - expression, - lats = lats, - lons = lons, - initial_time = initial_time, - final_time = final_time) - else: - tstr = "" - if tmin < tmax: tstr = " in the given time frame, tinput = [%.1f,%.1f], tmodel = [%.1f,%.1f]" % (initial_time,final_time,tmin+self.shift,tmax+self.shift) - logger.debug("[%s] Could not find [%s] in the model results%s" % (self.name,",".join(altvars),tstr)) - raise il.VarNotInModel() - else: - v = il.CombineVariables(V) - - - return v - - def derivedVariable(self,variable_name,expression,lats=None,lons=None,initial_time=-1e20,final_time=1e20): - """Creates a variable from an algebraic expression of variables in the model results. - - Parameters - ---------- - variable_name : str - name of the variable to create - expression : str - an algebraic expression describing how to combine model outputs - initial_time : float, optional - include model results occurring after this time - final_time : float, optional - include model results occurring before this time - lats : numpy.ndarray, optional - a 1D array of latitude locations at which to extract information - lons : numpy.ndarray, optional - a 1D array of longitude locations at which to extract information - - Returns - ------- - var : ILAMB.Variable.Variable - the new variable - - """ - from sympy import sympify - if expression is None: raise il.VarNotInModel() - args = {} - units = {} - unit = expression - mask = None - time = None - tbnd = None - lat = None - lon = None - ndata = None - area = None - depth = None - dbnds = None - - for arg in sympify(expression).free_symbols: - - var = self.extractTimeSeries(arg.name, - lats = lats, - lons = lons, - initial_time = initial_time, - final_time = final_time) - units[arg.name] = var.unit - args [arg.name] = var.data.data - - if mask is None: - mask = var.data.mask - else: - mask += var.data.mask - if time is None: - time = var.time - else: - assert(np.allclose(time,var.time)) - if tbnd is None: - tbnd = var.time_bnds - else: - assert(np.allclose(tbnd,var.time_bnds)) - if lat is None: - lat = var.lat - else: - assert(np.allclose(lat,var.lat)) - if lon is None: - lon = var.lon - else: - assert(np.allclose(lon,var.lon)) - if area is None: - area = var.area - else: - assert(np.allclose(area,var.area)) - if ndata is None: - ndata = var.ndata - else: - assert(np.allclose(ndata,var.ndata)) - if depth is None: - depth = var.depth - else: - assert(np.allclose(depth,var.depth)) - if dbnds is None: - dbnds = var.depth_bnds - else: - assert(np.allclose(dbnds,var.depth_bnds)) - - np.seterr(divide='ignore',invalid='ignore') - result,unit = il.SympifyWithArgsUnits(expression,args,units) - np.seterr(divide='raise',invalid='raise') - mask += np.isnan(result) - result = np.ma.masked_array(np.nan_to_num(result),mask=mask) - - return Variable(data = np.ma.masked_array(result,mask=mask), - unit = unit, - name = variable_name, - time = time, - time_bnds = tbnd, - lat = lat, - lon = lon, - area = area, - ndata = ndata, - depth = depth, - depth_bnds = dbnds) diff --git a/ilamb/ilamb/src/ILAMB/Post.py b/ilamb/ilamb/src/ILAMB/Post.py deleted file mode 100644 index 07366eb0..00000000 --- a/ilamb/ilamb/src/ILAMB/Post.py +++ /dev/null @@ -1,1048 +0,0 @@ -import pylab as plt -import numpy as np -from constants import space_opts,time_opts -from Regions import Regions -import re - -def UseLatexPltOptions(fsize=18): - params = {'axes.titlesize':fsize, - 'axes.labelsize':fsize, - 'font.size':fsize, - 'legend.fontsize':fsize, - 'xtick.labelsize':fsize, - 'ytick.labelsize':fsize} - plt.rcParams.update(params) - -def UnitStringToMatplotlib(unit,add_carbon=False): - # replace 1e-6 with micro - match = re.findall("(1e-6\s)",unit) - for m in match: unit = unit.replace(m,"$\mu$") - # raise exponents using Latex - match = re.findall("(-\d)",unit) - for m in match: unit = unit.replace(m,"$^{%s}$" % m) - # add carbon symbol to all mass units - if add_carbon: - match = re.findall("(\D*g)",unit) - for m in match: unit = unit.replace(m,"%s C " % m) - return unit - -def ColorBar(ax,**keywords): - """Plot a colorbar. - - We plot colorbars separately so they can be rendered once and used - for multiple plots. - - Parameters - ---------- - ax : matplotlib.axes._subplots.AxesSubplot - the matplotlib axes object onto which you wish to plot the variable - vmin : float, optional - the minimum plotted value - vmax : float, optional - the maximum plotted value - cmap : str, optional - the name of the colormap to be used in plotting the spatial variable - label : str, optional - the text which appears with the colorbar - - """ - from matplotlib import colorbar,colors - vmin = keywords.get("vmin",None) - vmax = keywords.get("vmax",None) - cmap = keywords.get("cmap","jet") - ticks = keywords.get("ticks",None) - ticklabels = keywords.get("ticklabels",None) - label = keywords.get("label",None) - cb = colorbar.ColorbarBase(ax,cmap=cmap, - norm=colors.Normalize(vmin=vmin,vmax=vmax), - orientation='horizontal') - cb.set_label(label) - if ticks is not None: cb.set_ticks(ticks) - if ticklabels is not None: cb.set_ticklabels(ticklabels) - -def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True): - """Plot a Taylor diagram. - - This is adapted from the code by Yannick Copin found here: - - https://gist.github.com/ycopin/3342888 - - Parameters - ---------- - stddev : numpy.ndarray - an array of standard deviations - corrcoeff : numpy.ndarray - an array of correlation coefficients - refstd : float - the reference standard deviation - fig : matplotlib figure - the matplotlib figure - colors : array - an array of colors for each element of the input arrays - normalize : bool, optional - disable to skip normalization of the standard deviation - - """ - from matplotlib.projections import PolarAxes - import mpl_toolkits.axisartist.floating_axes as FA - import mpl_toolkits.axisartist.grid_finder as GF - - # define transform - tr = PolarAxes.PolarTransform() - - # correlation labels - rlocs = np.concatenate((np.arange(10)/10.,[0.95,0.99])) - tlocs = np.arccos(rlocs) - gl1 = GF.FixedLocator(tlocs) - tf1 = GF.DictFormatter(dict(zip(tlocs,map(str,rlocs)))) - - # standard deviation axis extent - if normalize: - stddev = stddev/refstd - refstd = 1. - smin = 0 - smax = max(2.0,1.1*stddev.max()) - - # add the curvilinear grid - ghelper = FA.GridHelperCurveLinear(tr, - extremes=(0,np.pi/2,smin,smax), - grid_locator1=gl1, - tick_formatter1=tf1) - ax = FA.FloatingSubplot(fig, 111, grid_helper=ghelper) - fig.add_subplot(ax) - - # adjust axes - ax.axis["top"].set_axis_direction("bottom") - ax.axis["top"].toggle(ticklabels=True,label=True) - ax.axis["top"].major_ticklabels.set_axis_direction("top") - ax.axis["top"].label.set_axis_direction("top") - ax.axis["top"].label.set_text("Correlation") - ax.axis["left"].set_axis_direction("bottom") - if normalize: - ax.axis["left"].label.set_text("Normalized standard deviation") - else: - ax.axis["left"].label.set_text("Standard deviation") - ax.axis["right"].set_axis_direction("top") - ax.axis["right"].toggle(ticklabels=True) - ax.axis["right"].major_ticklabels.set_axis_direction("left") - ax.axis["bottom"].set_visible(False) - ax.grid(True) - - ax = ax.get_aux_axes(tr) - # Plot data - corrcoef = corrcoef.clip(-1,1) - for i in range(len(corrcoef)): - ax.plot(np.arccos(corrcoef[i]),stddev[i],'o',color=colors[i],mew=0,ms=8) - - # Add reference point and stddev contour - l, = ax.plot([0],refstd,'k*',ms=12,mew=0) - t = np.linspace(0, np.pi/2) - r = np.zeros_like(t) + refstd - ax.plot(t,r, 'k--') - - # centralized rms contours - rs,ts = np.meshgrid(np.linspace(smin,smax), - np.linspace(0,np.pi/2)) - rms = np.sqrt(refstd**2 + rs**2 - 2*refstd*rs*np.cos(ts)) - contours = ax.contour(ts,rs,rms,5,colors='k',alpha=0.4) - ax.clabel(contours,fmt='%1.1f') - - - return ax - -class HtmlFigure(): - - def __init__(self,name,pattern,side=None,legend=False,benchmark=False,longname=None): - - self.name = name - self.pattern = pattern - self.side = side - self.legend = legend - self.benchmark = benchmark - self.longname = longname - - def generateClickRow(self,allModels=False): - name = self.pattern - if allModels: name = name.replace(self.name,"PNAME") - for token in ['CNAME','MNAME','RNAME','PNAME']: - name = name.split(token) - name = ("' + %s + '" % token).join(name) - name = "'%s'" % name - name = name.replace("'' + ","") - code = """ - document.getElementById('%s').src = %s""" % (self.name,name) - if self.benchmark: - name = self.pattern.replace('MNAME','Benchmark') - for token in ['CNAME','MNAME','RNAME']: - name = name.split(token) - name = ("' + %s + '" % token).join(name) - name = "'%s'" % name - name = name.replace("'' + ","") - code += """ - document.getElementById('benchmark_%s').src = %s""" % (self.name,name) - return code - - def __str__(self): - - code = """ -
    -
    """ % (self.name) - if self.side is not None: - code += """ -
    %s
    """ % (self.side.replace(" "," ")) - code += """ - Data not available""" % (self.name) - if self.legend: - code += """ -
    Data not available
    """ % (self.name.replace("benchmark_","")) - code += """ -
    -
    """ - return code - - -class HtmlPage(object): - - def __init__(self,name,title): - self.name = name - self.title = title - self.cname = "" - self.pages = [] - self.metric_dict = None - self.models = None - self.regions = None - self.metrics = None - self.units = None - self.priority = ["Bias","RMSE","Phase","Seasonal","Spatial","Interannual","Score","Overall"] - self.header = "CNAME" - self.sections = [] - self.figures = {} - self.text = None - - def __str__(self): - - r = Regions() - def _sortFigures(figure): - macro = ["timeint","bias","rmse","iav","phase","shift","variance","spaceint","accumulate","cycle"] - val = 1. - for i,m in enumerate(macro): - if m in figure.name: val += 3**i - if figure.name.startswith("benchmark"): val -= 1. - if figure.name.endswith("score"): val += 1. - if figure.name.startswith("legend"): - if "variance" in figure.name: - val += 1. - else: - val = 0. - return val - - code = """ -
    -
    -

    %s

    """ % (self.name,self.name,self.title) - if self.pages: - code += """ -
    -
      """ - for page in self.pages: - opts = "" - if page == self: opts = " class=ui-btn-active ui-state-persist" - code += """ -
    • %s
    • """ % (page.name,opts,page.title) - code += """ -
    """ - code += """ -
    -
    """ - - if self.regions: - code += """ - """ - - if self.metric_dict: - code += """ -
    """ % self.name - - if self.text is not None: - code += """ - %s""" % self.text - - for section in self.sections: - if len(self.figures[section]) == 0: continue - self.figures[section].sort(key=_sortFigures) - code += """ -

    %s

    """ % section - for figure in self.figures[section]: - if figure.name == "spatial_variance": code += "
    " - code += "%s" % (figure) - code += """ -
    """ - - code += """ -
    """ - return code - - def setHeader(self,header): - self.header = header - - def setSections(self,sections): - - assert type(sections) == type([]) - self.sections = sections - for section in sections: self.figures[section] = [] - - def addFigure(self,section,name,pattern,side=None,legend=False,benchmark=False,longname=None): - - assert section in self.sections - for fig in self.figures[section]: - if fig.name == name: return - self.figures[section].append(HtmlFigure(name,pattern,side=side,legend=legend,benchmark=benchmark,longname=longname)) - - def setMetricPriority(self,priority): - self.priority = priority - - def googleScript(self): - if not self.metric_dict: return "" - models = self.models - regions = self.regions - metrics = self.metrics - units = self.units - cname = self.cname.split(" / ") - if len(cname) == 3: - cname = cname[1].strip() - else: - cname = cname[-1].strip() - callback = "%sTable()" % self.name - head = """ - function %sTable() { - var data = new google.visualization.DataTable(); - data.addColumn('string','Model'); - data.addColumn('string','Data');""" % (self.name) - for region in regions: - for metric in metrics: - head += """ - data.addColumn('number','%s [%s]');""" % (metric,metric,units[metric]) - head += """ - data.addRows([""" - for model in models: - head += """ - ['%s','[-]'""" % (model,cname,model) - for region in regions: - for metric in metrics: - add = ", null" - try: - add = ",%.03f" % self.metric_dict[model][region][metric].data - add = add.lower().replace("nan","null") - except: - pass - head += add - head += "]," - head += """ - ]);""" - - n = len(metrics) - cols = (str(range(2,n+2))[1:]).replace(", ",", %d*rid+" % n) - cols = "%d*rid+2" % n + cols[1:] - head += """ - var view = new google.visualization.DataView(data); - var rid = document.getElementById("%sRegion").selectedIndex - view.setColumns([0,1,%s);""" % (self.name,cols) - - head += """ - var table = new google.visualization.Table(document.getElementById('%s_table')); - table.draw(view, {showRowNumber: false,allowHtml: true});""" % self.name - - head += """ - function clickRow() { - var header = "%s"; - var CNAME = "%s"; - header = header.replace("CNAME",CNAME); - var rid = document.getElementById("%s").selectedIndex; - var RNAME = document.getElementById("%s").options[rid].value; - header = header.replace("RNAME",RNAME); - var select = table.getSelection() - row = select[0].row;""" % (self.header,self.cname,self.name+"Region",self.name+"Region") - if "Benchmark" in models: - head += """ - if (row == 0) { - table.setSelection([{'row': 1}]); - clickRow(); - return; - }""" - head += """ - var MNAME = data.getValue(row,0); - header = header.replace("MNAME",MNAME); - $("#%sHead").text(header);""" % (self.name) - for section in self.sections: - for figure in self.figures[section]: - head += figure.generateClickRow() - head += """ - } - google.visualization.events.addListener(table, 'select', clickRow); - table.setSelection([{'row': 0}]); - clickRow(); - }""" - return head,callback,"table" - - def setRegions(self,regions): - assert type(regions) == type([]) - self.regions = regions - - def setMetrics(self,metric_dict): - - # Sorting function - def _sortMetrics(name,priority=self.priority): - val = 1. - for i,pname in enumerate(priority): - if pname in name: val += 2**i - return val - - assert type(metric_dict) == type({}) - self.metric_dict = metric_dict - - # Build and sort models, regions, and metrics - models = self.metric_dict.keys() - regions = [] - metrics = [] - units = {} - for model in models: - for region in self.metric_dict[model].keys(): - if region not in regions: regions.append(region) - for metric in self.metric_dict[model][region].keys(): - units[metric] = self.metric_dict[model][region][metric].unit - if metric not in metrics: metrics.append(metric) - models.sort(key=lambda key: key.lower()) - if "Benchmark" in models: models.insert(0,models.pop(models.index("Benchmark"))) - regions.sort() - metrics.sort(key=_sortMetrics) - self.models = models - if self.regions is None: self.regions = regions - self.metrics = metrics - self.units = units - - def head(self): - return "" - -class HtmlAllModelsPage(HtmlPage): - - def __init__(self,name,title): - - super(HtmlAllModelsPage,self).__init__(name,title) - self.plots = None - self.nobench = None - self.nolegend = [] - - def _populatePlots(self): - - self.plots = [] - bench = [] - for page in self.pages: - if page.sections is not None: - for section in page.sections: - if len(page.figures[section]) == 0: continue - for figure in page.figures[section]: - if (figure.name in ["spatial_variance","compcycle","profile", - "legend_spatial_variance","legend_compcycle"]): continue # ignores - if "benchmark" in figure.name: - if figure.name not in bench: bench.append(figure.name) - continue - if figure not in self.plots: self.plots.append(figure) - if not figure.legend: self.nolegend.append(figure.name) - self.nobench = [plot.name for plot in self.plots if "benchmark_%s" % (plot.name) not in bench] - - def __str__(self): - - if self.plots is None: self._populatePlots() - r = Regions() - - code = """ -
    -
    -

    %s

    """ % (self.name,self.name,self.title) - if self.pages: - code += """ -
    -
      """ - for page in self.pages: - opts = "" - if page == self: opts = " class=ui-btn-active ui-state-persist" - code += """ -
    • %s
    • """ % (page.name,opts,page.title) - code += """ -
    """ - code += """ -
    -
    """ - - if self.regions: - code += """ - """ - - if self.plots: - code += """ - """ - - fig = self.plots[0] - rem_side = fig.side - fig.side = "MNAME" - img = "%s" % (fig) - img = img.replace('"leg"','"MNAME_legend"').replace("%s" % fig.name,"MNAME") - fig.side = rem_side - for model in self.pages[0].models: - code += img.replace("MNAME",model) - - if self.text is not None: - code += """ - %s""" % self.text - - code += """ -
    """ - return code - - def googleScript(self): - return "","","" - - def head(self): - - if self.plots is None: self._populatePlots() - - models = self.pages[0].models - regions = self.regions - try: - regions.sort() - except: - pass - head = """ - - """ - return head - -class HtmlSitePlotsPage(HtmlPage): - - def __init__(self,name,title): - - super(HtmlSitePlotsPage,self).__init__(name,title) - - def __str__(self): - - # setup page navigation - code = """ -
    -
    -

    %s

    """ % (self.name,self.name,self.title) - if self.pages: - code += """ -
    -
      """ - for page in self.pages: - opts = "" - if page == self: opts = " class=ui-btn-active ui-state-persist" - code += """ -
    • %s
    • """ % (page.name,opts,page.title) - code += """ -
    """ - code += """ -
    -
    """ - - code += """ - """ - - code += """ - """ - - code += """ -
    -
    -
    Data not available
    -
    """ - - code += """ -
    """ - - return code - - def setMetrics(self,metric_dict): - self.models.sort() - - def googleScript(self): - - callback = "%sMap()" % (self.name) - head = """ - function %sMap() { - var sitedata = google.visualization.arrayToDataTable( - [['Latitude', 'Longitude', '%s [%s]'],\n""" % (self.name,self.vname,self.unit) - - for lat,lon,val in zip(self.lat,self.lon,self.vals): - if val is np.ma.masked: - sval = "null" - else: - sval = "%.2f" % val - head += " [%.3f,%.3f,%s],\n" % (lat,lon,sval) - head = head[:-2] + "]);\n" - head += (" var names = %s;" % (self.sites)).replace("u'","'").replace(", '",",'") - head += """ - var options = { - dataMode: 'markers', - magnifyingGlass: {enable: true, zoomFactor: 3.}, - }; - var container = document.getElementById('map_canvas'); - var geomap = new google.visualization.GeoChart(container); - function updateMap() { - var mid = document.getElementById("%sModel").selectedIndex; - var MNAME = document.getElementById("%sModel").options[mid].value; - var rid = document.getElementById("%sSite" ).selectedIndex; - var RNAME = document.getElementById("%sSite" ).options[rid].value; - document.getElementById('time').src = MNAME + '_' + RNAME + '_time.png'; - } - function clickMap() { - var select = geomap.getSelection(); - if (Object.keys(select).length == 1) { - var site = $("select#SitePlotsSite"); - site[0].selectedIndex = select[0].row; - site.selectmenu('refresh'); - } - updateMap(); - } - google.visualization.events.addListener(geomap,'select',clickMap); - geomap.draw(sitedata, options); - updateMap(); - };""" % (self.name,self.name,self.name,self.name) - - return head,callback,"geomap" - - def head(self): - return "" - -class HtmlLayout(): - - def __init__(self,pages,cname,years=None): - - self.pages = pages - self.cname = cname.replace("/"," / ") - if years is not None: - try: - self.cname += " / %d-%d" % (years) - except: - pass - for page in self.pages: - page.pages = self.pages - page.cname = self.cname - - def __str__(self): - code = """ - """ - - code += """ - - - - - """ - - ### stick in javascript stuff here - functions = [] - callbacks = [] - packages = [] - for page in self.pages: - out = page.googleScript() - if len(out) == 3: - f,c,p = out - if f != "": functions.append(f) - if c != "": callbacks.append(c) - if p != "": packages.append(p) - - code += """ - """ - - for page in self.pages: code += page.head() - - ### stick in css stuff here - code += """ - """ - - code += """ - - """ - - ### loop over pages - for page in self.pages: code += "%s" % (page) - - code += """ - -""" - return code - -def RegisterCustomColormaps(): - """Adds the 'stoplight' and 'RdGn' colormaps to matplotlib's database - - """ - import colorsys as cs - - # stoplight colormap - Rd1 = [1.,0.,0.]; Rd2 = Rd1 - Yl1 = [1.,1.,0.]; Yl2 = Yl1 - Gn1 = [0.,1.,0.]; Gn2 = Gn1 - val = 0.65 - Rd1 = cs.rgb_to_hsv(Rd1[0],Rd1[1],Rd1[2]) - Rd1 = cs.hsv_to_rgb(Rd1[0],Rd1[1],val ) - Yl1 = cs.rgb_to_hsv(Yl1[0],Yl1[1],Yl1[2]) - Yl1 = cs.hsv_to_rgb(Yl1[0],Yl1[1],val ) - Gn1 = cs.rgb_to_hsv(Gn1[0],Gn1[1],Gn1[2]) - Gn1 = cs.hsv_to_rgb(Gn1[0],Gn1[1],val ) - p = 0 - level1 = 0.5 - level2 = 0.75 - RdYlGn = {'red': ((0.0 , 0.0 ,Rd1[0]), - (level1-p, Rd2[0],Rd2[0]), - (level1+p, Yl1[0],Yl1[0]), - (level2-p, Yl2[0],Yl2[0]), - (level2+p, Gn1[0],Gn1[0]), - (1.00 , Gn2[0], 0.0)), - - 'green': ((0.0 , 0.0 ,Rd1[1]), - (level1-p, Rd2[1],Rd2[1]), - (level1+p, Yl1[1],Yl1[1]), - (level2-p, Yl2[1],Yl2[1]), - (level2+p, Gn1[1],Gn1[1]), - (1.00 , Gn2[1], 0.0)), - - 'blue': ((0.0 , 0.0 ,Rd1[2]), - (level1-p, Rd2[2],Rd2[2]), - (level1+p, Yl1[2],Yl1[2]), - (level2-p, Yl2[2],Yl2[2]), - (level2+p, Gn1[2],Gn1[2]), - (1.00 , Gn2[2], 0.0))} - plt.register_cmap(name='stoplight', data=RdYlGn) - - # RdGn colormap - val = 0.8 - Rd = cs.rgb_to_hsv(1,0,0) - Rd = cs.hsv_to_rgb(Rd[0],Rd[1],val) - Gn = cs.rgb_to_hsv(0,1,0) - Gn = cs.hsv_to_rgb(Gn[0],Gn[1],val) - RdGn = {'red': ((0.0, 0.0, Rd[0]), - (0.5, 1.0 , 1.0 ), - (1.0, Gn[0], 0.0 )), - 'green': ((0.0, 0.0, Rd[1]), - (0.5, 1.0, 1.0 ), - (1.0, Gn[1], 0.0 )), - 'blue': ((0.0, 0.0, Rd[2]), - (0.5, 1.0, 1.0 ), - (1.0, Gn[2], 0.0 ))} - plt.register_cmap(name='RdGn', data=RdGn) - - -def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=False): - """Creates a summary figure for the benchmark results contained in the - data array. - - Parameters - ---------- - models : list - a list of the model names - variables : list - a list of the variable names - data : numpy.ndarray or numpy.ma.ndarray - data scores whose shape is ( len(variables), len(models) ) - figname : str - the full path of the output file to write - vcolor : list, optional - an array parallel to the variables array containing background - colors for the labels to be displayed on the y-axis. - """ - from mpl_toolkits.axes_grid1 import make_axes_locatable - - # data checks - assert type(models) is type(list()) - assert type(variables) is type(list()) - assert (type(data) is type(np .empty(1)) or - type(data) is type(np.ma.empty(1))) - assert data.shape[0] == len(variables) - assert data.shape[1] == len(models ) - assert type(figname) is type("") - if vcolor is not None: - assert type(vcolor) is type(list()) - assert len(vcolor) == len(variables) - - # define some parameters - nmodels = len(models) - nvariables = len(variables) - maxV = max([len(v) for v in variables]) - maxM = max([len(m) for m in models]) - wpchar = 0.15 - wpcell = 0.19 - hpcell = 0.25 - w = maxV*wpchar + max(4,nmodels)*wpcell - if not rel_only: w += (max(4,nmodels)+1)*wpcell - h = maxM*wpchar + nvariables*hpcell + 1.0 - w = max((nmodels-3.)/(14.-3.)*(9.5-5.08)+5.08,7.) # heuristic for figure size - h = 8. - bad = 0.5 - if "stoplight" not in plt.colormaps(): RegisterCustomColormaps() - - # plot the variable scores - if rel_only: - fig,ax = plt.subplots(figsize=(w,h),ncols=1,tight_layout=True) - ax = [ax] - else: - fig,ax = plt.subplots(figsize=(w,h),ncols=2,tight_layout=True) - - # absolute score - if not rel_only: - cmap = plt.get_cmap('stoplight') - cmap.set_bad('k',bad) - qc = ax[0].pcolormesh(np.ma.masked_invalid(data[::-1,:]),cmap=cmap,vmin=0,vmax=1,linewidth=0) - div = make_axes_locatable(ax[0]) - fig.colorbar(qc, - ticks=(0,0.25,0.5,0.75,1.0), - format="%g", - cax=div.append_axes("bottom", size="5%", pad=0.05), - orientation="horizontal", - label="Variable Score") - plt.tick_params(which='both', length=0) - ax[0].xaxis.tick_top() - ax[0].set_xticks (np.arange(nmodels )+0.5) - ax[0].set_xticklabels(models,rotation=90) - ax[0].set_yticks (np.arange(nvariables)+0.5) - ax[0].set_yticklabels(variables[::-1]) - ax[0].tick_params('both',length=0,width=0,which='major') - ax[0].tick_params(axis='y',pad=10) - ax[0].set_xlim(0,nmodels) - ax[0].set_ylim(0,nvariables) - ax[0].tick_params(axis='y', pad=10) - if vcolor is not None: - for i,t in enumerate(ax[0].yaxis.get_ticklabels()): - t.set_backgroundcolor(vcolor[::-1][i]) - - # relative score - i = 0 if rel_only else 1 - np.seterr(invalid='ignore',under='ignore') - data = np.ma.masked_invalid(data) - data.data[data.mask] = 1. - data = np.ma.masked_values(data,1.) - mean = data.mean(axis=1) - std = data.std (axis=1).clip(0.02) - np.seterr(invalid='ignore',under='ignore') - Z = (data-mean[:,np.newaxis])/std[:,np.newaxis] - Z = np.ma.masked_invalid(Z) - np.seterr(invalid='warn',under='raise') - cmap = plt.get_cmap('RdGn') - cmap.set_bad('k',bad) - qc = ax[i].pcolormesh(Z[::-1],cmap=cmap,vmin=-2,vmax=2,linewidth=0) - div = make_axes_locatable(ax[i]) - fig.colorbar(qc, - ticks=(-2,-1,0,1,2), - format="%+d", - cax=div.append_axes("bottom", size="5%", pad=0.05), - orientation="horizontal", - label="Variable Z-score") - plt.tick_params(which='both', length=0) - ax[i].xaxis.tick_top() - ax[i].set_xticks(np.arange(nmodels)+0.5) - ax[i].set_xticklabels(models,rotation=90) - ax[i].tick_params('both',length=0,width=0,which='major') - ax[i].set_yticks([]) - ax[i].set_xlim(0,nmodels) - ax[i].set_ylim(0,nvariables) - if rel_only: - ax[i].set_yticks (np.arange(nvariables)+0.5) - ax[i].set_yticklabels(variables[::-1]) - if vcolor is not None: - for i,t in enumerate(ax[i].yaxis.get_ticklabels()): - t.set_backgroundcolor(vcolor[::-1][i]) - - # save figure - fig.savefig(figname) - -def WhittakerDiagram(X,Y,Z,**keywords): - """Creates a Whittaker diagram. - - Parameters - ---------- - X : ILAMB.Variable.Variable - the first independent axis, classically representing temperature - Y : ILAMB.Variable.Variable - the second independent axis, classically representing precipitation - Z : ILAMB.Variable.Variable - the dependent axis - X_plot_unit,Y_plot_unit,Z_plot_unit : str, optional - the string representing the units of the corresponding variable - region : str, optional - the string representing the region overwhich to plot the diagram - X_min,Y_min,Z_min : float, optional - the minimum plotted value of the corresponding variable - X_max,Y_max,Z_max : float, optional - the maximum plotted value of the corresponding variable - X_label,Y_label,Z_label : str, optional - the labels of the corresponding variable - filename : str, optional - the output filename - """ - from mpl_toolkits.axes_grid1 import make_axes_locatable - - # possibly integrate in time - if X.temporal: X = X.integrateInTime(mean=True) - if Y.temporal: Y = Y.integrateInTime(mean=True) - if Z.temporal: Z = Z.integrateInTime(mean=True) - - # convert to plot units - X_plot_unit = keywords.get("X_plot_unit",X.unit) - Y_plot_unit = keywords.get("Y_plot_unit",Y.unit) - Z_plot_unit = keywords.get("Z_plot_unit",Z.unit) - if X_plot_unit is not None: X.convert(X_plot_unit) - if Y_plot_unit is not None: Y.convert(Y_plot_unit) - if Z_plot_unit is not None: Z.convert(Z_plot_unit) - - # flatten data, if any data is masked all the data is masked - mask = (X.data.mask + Y.data.mask + Z.data.mask)==0 - - # mask outside region - from constants import regions as ILAMBregions - region = keywords.get("region","global") - lats,lons = ILAMBregions[region] - mask += (np.outer((X.lat>lats[0])*(X.latlons[0])*(X.lon=lat[:-1])*(var.lat[:,np.newaxis]<=lat[1:])).argmax(axis=1) - cols = ((var.lon[:,np.newaxis]>=lon[:-1])*(var.lon[:,np.newaxis]<=lon[1:])).argmax(axis=1) - else: - # if more globally defined, nearest neighbor is fine - rows = (np.abs(lat[:,np.newaxis]-var.lat)).argmin(axis=0) - cols = (np.abs(lon[:,np.newaxis]-var.lon)).argmin(axis=0) - if var.ndata: return mask[np.ix_(rows,cols)].diagonal() - return mask[np.ix_(rows,cols)] - - def hasData(self,label,var): - """Checks if the ILAMB.Variable has data on the given region. - - Parameters - ---------- - label : str - the unique region identifier - var : ILAMB.Variable.Variable - the variable to which we would like check for data - - Returns - ------- - hasdata : boolean - returns True if variable has data on the given region - """ - axes = range(var.data.ndim) - if var.spatial: axes = axes[:-2] - if var.ndata : axes = axes[:-1] - keep = (var.data.mask == False).any(axis=tuple(axes)) - keep *= (self.getMask(label,var)==False) - if keep.sum() > 0: return True - return False - -if "global" not in Regions().regions: - - # Populate some regions - r = Regions() - r.addRegionLatLonBounds("global","Globe",(-89.75, 89.75),(-179.75, 179.75)) - - # GFED regions - r.addRegionLatLonBounds("bona","Boreal North America", ( 49.75, 79.75),(-170.25,- 60.25)) - r.addRegionLatLonBounds("tena","Temperate North America", ( 30.25, 49.75),(-125.25,- 66.25)) - r.addRegionLatLonBounds("ceam","Central America", ( 9.75, 30.25),(-115.25,- 80.25)) - r.addRegionLatLonBounds("nhsa","Northern Hemisphere South America",( 0.25, 12.75),(- 80.25,- 50.25)) - r.addRegionLatLonBounds("shsa","Southern Hemisphere South America",(-59.75, 0.25),(- 80.25,- 33.25)) - r.addRegionLatLonBounds("euro","Europe", ( 35.25, 70.25),(- 10.25, 30.25)) - r.addRegionLatLonBounds("mide","Middle East", ( 20.25, 40.25),(- 10.25, 60.25)) - r.addRegionLatLonBounds("nhaf","Northern Hemisphere Africa", ( 0.25, 20.25),(- 20.25, 45.25)) - r.addRegionLatLonBounds("shaf","Southern Hemisphere Africa", (-34.75, 0.25),( 10.25, 45.25)) - r.addRegionLatLonBounds("boas","Boreal Asia", ( 54.75, 70.25),( 30.25, 179.75)) - r.addRegionLatLonBounds("ceas","Central Asia", ( 30.25, 54.75),( 30.25, 142.58)) - r.addRegionLatLonBounds("seas","Southeast Asia", ( 5.25, 30.25),( 65.25, 120.25)) - r.addRegionLatLonBounds("eqas","Equatorial Asia", (-10.25, 10.25),( 99.75, 150.25)) - r.addRegionLatLonBounds("aust","Australia", (-41.25,-10.50),( 112.00, 154.00)) diff --git a/ilamb/ilamb/src/ILAMB/Scoreboard.py b/ilamb/ilamb/src/ILAMB/Scoreboard.py deleted file mode 100644 index bb8bbe0c..00000000 --- a/ilamb/ilamb/src/ILAMB/Scoreboard.py +++ /dev/null @@ -1,856 +0,0 @@ -from Confrontation import Confrontation -from ConfNBP import ConfNBP -from ConfTWSA import ConfTWSA -from ConfRunoff import ConfRunoff -from ConfEvapFraction import ConfEvapFraction -from ConfIOMB import ConfIOMB -from ConfDiurnal import ConfDiurnal -from ConfPermafrost import ConfPermafrost -import os,re -from netCDF4 import Dataset -import numpy as np -from Post import BenchmarkSummaryFigure -from ilamblib import MisplacedData - -global_print_node_string = "" -global_confrontation_list = [] -global_model_list = [] - -class Node(object): - - def __init__(self, name): - self.name = name - self.children = [] - self.parent = None - self.source = None - self.cmap = None - self.variable = None - self.alternate_vars = None - self.derived = None - self.land = False - self.confrontation = None - self.output_path = None - self.bgcolor = "#EDEDED" - self.table_unit = None - self.plot_unit = None - self.space_mean = True - self.relationships = None - self.ctype = None - self.regions = None - self.skip_rmse = False - self.skip_iav = False - self.mass_weighting = False - self.weight = 1 # if a dataset has no weight specified, it is implicitly 1 - self.sum_weight_children = 0 # what is the sum of the weights of my children? - self.normalize_weight = 0 # my weight relative to my siblings - self.overall_weight = 0 # the multiplication my normalized weight by all my parents' normalized weights - self.score = 0 # placeholder - - def __str__(self): - if self.parent is None: return "" - name = self.name if self.name is not None else "" - weight = self.weight - if self.isLeaf(): - s = "%s%s %s" % (" "*(self.getDepth()-1),name,self.score) - else: - s = "%s%s %s" % (" "*(self.getDepth()-1),name,self.score) - return s - - def isLeaf(self): - if len(self.children) == 0: return True - return False - - def addChild(self, node): - node.parent = self - self.children.append(node) - - def getDepth(self): - depth = 0 - parent = self.parent - while parent is not None: - depth += 1 - parent = parent.parent - return depth - -def TraversePostorder(node,visit): - for child in node.children: TraversePostorder(child,visit) - visit(node) - -def TraversePreorder(node,visit): - visit(node) - for child in node.children: TraversePreorder(child,visit) - -def PrintNode(node): - global global_print_node_string - global_print_node_string += "%s\n" % (node) - -def ConvertTypes(node): - def _to_bool(a): - if type(a) is type(True): return a - if type(a) is type("") : return a.lower() == "true" - node.weight = float(node.weight) - node.land = _to_bool(node.land) - node.space_mean = _to_bool(node.space_mean) - if node.regions is not None: node.regions = node.regions.split(",") - if node.relationships is not None: node.relationships = node.relationships.split(",") - if node.alternate_vars is not None: - node.alternate_vars = node.alternate_vars.split(",") - else: - node.alternate_vars = [] - -def SumWeightChildren(node): - for child in node.children: node.sum_weight_children += child.weight - -def NormalizeWeights(node): - if node.parent is not None: - sumw = 1. - if node.parent.sum_weight_children > 0: sumw = node.parent.sum_weight_children - node.normalize_weight = node.weight/sumw - -def OverallWeights(node): - if node.isLeaf(): - node.overall_weight = node.normalize_weight - parent = node.parent - while parent.parent is not None: - node.overall_weight *= parent.normalize_weight - parent = parent.parent - -def InheritVariableNames(node): - if node.parent is None: return - if node.variable is None: node.variable = node.parent.variable - if node.derived is None: node.derived = node.parent.derived - if node.cmap is None: node.cmap = node.parent.cmap - if node.ctype is None: node.ctype = node.parent.ctype - if node.skip_rmse is False: node.skip_rmse = node.parent.skip_rmse - if node.skip_iav is False: node.skip_iav = node.parent.skip_iav - if node.mass_weighting is False: node.mass_weighting = node.parent.mass_weighting - node.alternate_vars = node.parent.alternate_vars - -def ParseScoreboardConfigureFile(filename): - root = Node(None) - previous_node = root - current_level = 0 - for line in file(filename).readlines(): - line = line.strip() - if line.startswith("#"): continue - m1 = re.search(r"\[h(\d):\s+(.*)\]",line) - m2 = re.search(r"\[(.*)\]",line) - m3 = re.search(r"(.*)=(.*)",line) - if m1: - level = int(m1.group(1)) - assert level-current_level<=1 - name = m1.group(2) - node = Node(name) - if level == current_level: - previous_node.parent.addChild(node) - elif level > current_level: - previous_node.addChild(node) - current_level = level - else: - addto = root - for i in range(level-1): addto = addto.children[-1] - addto.addChild(node) - current_level = level - previous_node = node - - if not m1 and m2: - node = Node(m2.group(1)) - previous_node.addChild(node) - - if m3: - keyword = m3.group(1).strip() - value = m3.group(2).strip().replace('"','') - #if keyword not in node.__dict__.keys(): continue - try: - node.__dict__[keyword] = value - except: - pass - - TraversePreorder (root,ConvertTypes) - TraversePostorder(root,SumWeightChildren) - TraversePreorder (root,NormalizeWeights) - TraversePreorder (root,OverallWeights) - TraversePostorder(root,InheritVariableNames) - return root - - -ConfrontationTypes = { None : Confrontation, - "ConfNBP" : ConfNBP, - "ConfTWSA" : ConfTWSA, - "ConfRunoff" : ConfRunoff, - "ConfEvapFraction": ConfEvapFraction, - "ConfIOMB" : ConfIOMB, - "ConfDiurnal" : ConfDiurnal, - "ConfPermafrost" : ConfPermafrost} - -class Scoreboard(): - """ - A class for managing confrontations - """ - def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None,rel_only=False): - - if not os.environ.has_key('ILAMB_ROOT'): - raise ValueError("You must set the environment variable 'ILAMB_ROOT'") - self.build_dir = build_dir - self.rel_only = rel_only - - if (master and not os.path.isdir(self.build_dir)): os.mkdir(self.build_dir) - - self.tree = ParseScoreboardConfigureFile(filename) - max_name_len = 45 - - def _initConfrontation(node): - if not node.isLeaf(): return - - # if the user hasn't set regions, use the globally defined ones - if node.regions is None: node.regions = regions - - # pick the confrontation to use, is it a built-in confrontation? - if ConfrontationTypes.has_key(node.ctype): - Constructor = ConfrontationTypes[node.ctype] - else: - # try importing the confrontation - conf = __import__(node.ctype) - Constructor = conf.__dict__[node.ctype] - - try: - if node.cmap is None: node.cmap = "jet" - node.source = os.path.join(os.environ["ILAMB_ROOT"],node.source) - node.confrontation = Constructor(**(node.__dict__)) - node.confrontation.extents = extents - - if verbose and master: print (" {0:>%d}\033[92m Initialized\033[0m" % max_name_len).format(node.confrontation.longname) - - except MisplacedData: - - if (master and verbose): - longname = node.output_path - longname = longname.replace("//","/").replace(self.build_dir,"") - if longname[-1] == "/": longname = longname[:-1] - longname = "/".join(longname.split("/")[1:]) - print (" {0:>%d}\033[91m MisplacedData\033[0m" % max_name_len).format(longname) - - def _buildDirectories(node): - if node.name is None: return - path = "" - parent = node - while parent.name is not None: - path = os.path.join(parent.name.replace(" ",""),path) - parent = parent.parent - path = os.path.join(self.build_dir,path) - if not os.path.isdir(path) and master: os.mkdir(path) - node.output_path = path - - TraversePreorder(self.tree,_buildDirectories) - TraversePreorder(self.tree,_initConfrontation) - - def __str__(self): - global global_print_node_string - global_print_node_string = "" - TraversePreorder(self.tree,PrintNode) - return global_print_node_string - - def list(self): - def _hasConfrontation(node): - global global_confrontation_list - if node.confrontation is not None: - global_confrontation_list.append(node.confrontation) - global global_confrontation_list - global_confrontation_list = [] - TraversePreorder(self.tree,_hasConfrontation) - return global_confrontation_list - - def createHtml(self,M,filename="index.html"): - - # Create html assets - from pylab import imsave - arrows = np.zeros((32,16,4)) - for i in range(7): - arrows[ 4+i,(7-i):(7+i+1),3] = 1 - arrows[27-i,(7-i):(7+i+1),3] = 1 - imsave("%s/arrows.png" % self.build_dir,arrows) - - # Create a tree for relationship scores (hack) - rel_tree = GenerateRelationshipTree(self,M) - has_rel = np.asarray([len(rel.children) for rel in rel_tree.children]).sum() > 0 - nav = "" - if has_rel: - GenerateRelSummaryFigure(rel_tree,M,"%s/overview_rel.png" % self.build_dir,rel_only=self.rel_only) - nav = """ -
  • Relationship
  • """ - #global global_print_node_string - #global_print_node_string = "" - #TraversePreorder(rel_tree,PrintNode) - #print global_print_node_string - - from ILAMB.generated_version import version as ilamb_version - html = r""" - - - ILAMB Benchmark Results - - - - - - """ - html += """ - """ - html += """ - - - """ - - html += """ -
    -
    -

    ILAMB Benchmark Results

    - -
    -
    - -
    -
    -
    ILAMB %s
    -
    -
    """ % (nav,ilamb_version) - - if has_rel: - html += """ -
    -
    -

    ILAMB Benchmark Results

    - -
    -
    - -
    -
    -
    -
    """ - - html += """ -
    -
    -

    ILAMB Benchmark Results

    - -
    - -
    -

    Mean State Scores

    - - - - """ % nav - for m in M: - html += """ - """ % m.name - html += """ - - - - """ - - for tree in self.tree.children: html += GenerateTable(tree,M,self) - html += """ - -
    %s
    -
    """ - - if has_rel: - html += """ -

    Relationship Scores

    - - - - """ - for m in M: - html += """ - """ % m.name - html += """ - - - - """ - for tree in rel_tree.children: html += GenerateTable(tree,M,self,composite=False) - html += """ - -
    %s
    -
    """ - - html += """ -
    -
    -
    - - -""" - file("%s/%s" % (self.build_dir,filename),"w").write(html) - - def createBarCharts(self,M): - html = GenerateBarCharts(self.tree,M) - - def createSummaryFigure(self,M): - GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir,rel_only=self.rel_only) - - def dumpScores(self,M,filename): - out = file("%s/%s" % (self.build_dir,filename),"w") - out.write("Variables,%s\n" % (",".join([m.name for m in M]))) - for cat in self.tree.children: - for v in cat.children: - try: - out.write("%s,%s\n" % (v.name,','.join([str(s) for s in v.score]))) - except: - out.write("%s,%s\n" % (v.name,','.join(["~"]*len(M)))) - out.close() - -def CompositeScores(tree,M): - global global_model_list - global_model_list = M - def _loadScores(node): - if node.isLeaf(): - if node.confrontation is None: return - data = np.zeros(len(global_model_list)) - mask = np.ones (len(global_model_list),dtype=bool) - for ind,m in enumerate(global_model_list): - fname = "%s/%s_%s.nc" % (node.confrontation.output_path,node.confrontation.name,m.name) - if os.path.isfile(fname): - try: - dataset = Dataset(fname) - grp = dataset.groups["MeanState"].groups["scalars"] - except: - continue - if grp.variables.has_key("Overall Score global"): - data[ind] = grp.variables["Overall Score global"][0] - mask[ind] = 0 - else: - data[ind] = -999. - mask[ind] = 1 - node.score = np.ma.masked_array(data,mask=mask) - else: - node.score = 0 - sum_weights = 0 - for child in node.children: - node.score += child.score*child.weight - sum_weights += child.weight - np.seterr(over='ignore',under='ignore') - node.score /= sum_weights - np.seterr(over='raise',under='raise') - TraversePostorder(tree,_loadScores) - -global_html = "" -global_table_color = "" - -def DarkenRowColor(clr,fraction=0.9): - from colorsys import rgb_to_hsv,hsv_to_rgb - def hex_to_rgb(value): - value = value.lstrip('#') - lv = len(value) - rgb = tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3)) - rgb = np.asarray(rgb)/255. - return rgb - def rgb_to_hex(rgb): - return '#%02x%02x%02x' % rgb - rgb = hex_to_rgb(clr) - hsv = rgb_to_hsv(rgb[0],rgb[1],rgb[2]) - rgb = hsv_to_rgb(hsv[0],hsv[1],fraction*hsv[2]) - rgb = tuple(np.asarray(np.asarray(rgb)*255.,dtype=int)) - return rgb_to_hex(rgb) - -def BuildHTMLTable(tree,M,build_dir): - global global_model_list - global_model_list = M - def _genHTML(node): - global global_html - global global_table_color - ccolor = DarkenRowColor(global_table_color,fraction=0.95) - - # setup a html table row - if node.isLeaf(): - row = '' % ccolor - else: - row = '' % global_table_color - - # first table column - tab = '' - if node.isLeaf(): tab = '   ' - name = node.name - if node.confrontation: - conf = node.confrontation - if type(conf) == str: - path = conf.replace(build_dir,"").lstrip("/") - else: - path = os.path.join(conf.output_path.replace(build_dir,"").lstrip("/"),conf.name + ".html") - name = '%s' % (path,node.name) - if node.isLeaf(): - row += '%s%s (%.1f%%)' % (tab,name,100*node.normalize_weight) - else: - row += '%s%s' % (tab,name) - - # populate the rest of the columns - if type(node.score) != type(np.ma.empty(0)): node.score = np.ma.masked_array(np.zeros(len(global_model_list)),mask=True) - for i,m in enumerate(global_model_list): - if not node.score.mask[i]: - row += '%.2f' % node.score[i] - else: - row += '~' - - # end the table row - row += '
    ' - global_html += row - - TraversePreorder(tree,_genHTML) - -def GenerateTable(tree,M,S,composite=True): - global global_html - global global_model_list - global global_table_color - if composite: CompositeScores(tree,M) - global_model_list = M - global_table_color = tree.bgcolor - global_html = "" - for cat in tree.children: BuildHTMLTable(cat,M,S.build_dir) - return global_html - -def GenerateSummaryFigure(tree,M,filename,rel_only=False): - - models = [m.name for m in M] - variables = [] - vcolors = [] - for cat in tree.children: - for var in cat.children: - variables.append(var.name) - vcolors.append(cat.bgcolor) - - data = np.ma.zeros((len(variables),len(models))) - row = -1 - for cat in tree.children: - for var in cat.children: - row += 1 - if type(var.score) == float: - data[row,:] = np.nan - else: - data[row,:] = var.score - -def GenerateRelSummaryFigure(S,M,figname): - - def _parse(node): - global score,count,rows - if node.level != 5: return - row = "%s vs. %s" % (node.parent.parent.parent.name,node.parent.name) - col = node.name - if row not in rows: rows.append(row) - if not score .has_key(row): score[row] = {} - if not count .has_key(row): count[row] = {} - if not score[row].has_key(col): score[row][col] = 0. - if not count[row].has_key(col): count[row][col] = 0. - score[row][col] += node.score - count[row][col] += 1. - - class rnode(): - def __init__(self,name,level): - self.name = name - self.level = level - self.parent = None - self.score = None - self.children = [] - - - root = S.build_dir - tree = rnode("root",0) - previous_node = tree - current_level = 0 - - for subdir, dirs, files in os.walk(root): - if subdir == root: continue - flat = subdir.replace(root,"").lstrip("/").split("/") - level = len(flat) - name = flat[-1] - child = rnode(name,level) - if level == current_level: - child.parent = previous_node.parent - previous_node.parent.children.append(child) - if level == 3: - for fname in [f for f in files if f.endswith(".nc") and "Benchmark" not in f]: - with Dataset(os.path.join(subdir,fname)) as dset: - if "Relationships" not in dset.groups: continue - grp = dset.groups["Relationships"]["scalars"] - model = dset.name - for var in [var for var in grp.variables.keys() if ("Overall" not in var and - "global" in var)]: - rname = var.split(" ")[0] - hadrel = False - for c in child.children: - if c.name == rname: - rel = c - hadrel = True - if not hadrel: rel = rnode(rname,level+1) - mod = rnode(model,level+2) - mod.score = grp.variables[var][...] - mod.parent = rel - rel.children.append(mod) - rel.parent = child - if not hadrel: child.children.append(rel) - elif level > current_level: - child.parent = previous_node - previous_node.children.append(child) - current_level = level - else: - addto = tree - for i in range(level-1): addto = addto.children[-1] - child.parent = addto - addto.children.append(child) - current_level = level - previous_node = child - - global score,count,rows - score = {} - count = {} - rows = [] - TraversePreorder(tree,_parse) - models = [] - for row in rows: - for key in score[row].keys(): - if key not in models: models.append(key) - data = np.zeros((len(rows),len(models))) - BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors,rel_only=rel_only) - -def GenerateRelSummaryFigure(S,M,figname,rel_only=False): - - # reorganize the relationship data - scores = {} - counts = {} - rows = [] - vcolors = [] - for h1 in S.children: - for dep in h1.children: - dname = dep.name.split("/")[0] - for ind in dep.children: - iname = ind.name.split("/")[0] - key = "%s/%s" % (dname,iname) - if scores.has_key(key): - scores[key] += ind.score - counts[key] += 1. - else: - scores[key] = np.copy(ind.score) - counts[key] = 1. - rows .append(key) - vcolors.append(h1.bgcolor) - if len(rows) == 0: return - data = np.ma.zeros((len(rows),len(M))) - for i,row in enumerate(rows): - data[i,:] = scores[row] / counts[row] - BenchmarkSummaryFigure([m.name for m in M],rows,data,figname,rel_only=rel_only,vcolor=vcolors) - -def GenerateRelSummaryFigure(S,M,figname): - - def _parse(node): - global score,count,rows - if node.level != 5: return - row = "%s vs. %s" % (node.parent.parent.parent.name,node.parent.name) - col = node.name - if row not in rows: rows.append(row) - if not score .has_key(row): score[row] = {} - if not count .has_key(row): count[row] = {} - if not score[row].has_key(col): score[row][col] = 0. - if not count[row].has_key(col): count[row][col] = 0. - score[row][col] += node.score - count[row][col] += 1. - - class rnode(): - def __init__(self,name,level): - self.name = name - self.level = level - self.parent = None - self.score = None - self.children = [] - - - root = S.build_dir - tree = rnode("root",0) - previous_node = tree - current_level = 0 - - for subdir, dirs, files in os.walk(root): - if subdir == root: continue - flat = subdir.replace(root,"").lstrip("/").split("/") - level = len(flat) - name = flat[-1] - child = rnode(name,level) - if level == current_level: - child.parent = previous_node.parent - previous_node.parent.children.append(child) - if level == 3: - for fname in [f for f in files if f.endswith(".nc") and "Benchmark" not in f]: - with Dataset(os.path.join(subdir,fname)) as dset: - if "Relationships" not in dset.groups: continue - grp = dset.groups["Relationships"]["scalars"] - model = dset.name - for var in [var for var in grp.variables.keys() if ("Overall" not in var and - "global" in var)]: - rname = var.split(" ")[0] - hadrel = False - for c in child.children: - if c.name == rname: - rel = c - hadrel = True - if not hadrel: rel = rnode(rname,level+1) - mod = rnode(model,level+2) - mod.score = grp.variables[var][...] - mod.parent = rel - rel.children.append(mod) - rel.parent = child - if not hadrel: child.children.append(rel) - elif level > current_level: - child.parent = previous_node - previous_node.children.append(child) - current_level = level - else: - addto = tree - for i in range(level-1): addto = addto.children[-1] - child.parent = addto - addto.children.append(child) - current_level = level - previous_node = child - - global score,count,rows - score = {} - count = {} - rows = [] - TraversePreorder(tree,_parse) - models = [] - for row in rows: - for key in score[row].keys(): - if key not in models: models.append(key) - data = np.zeros((len(rows),len(models))) - for i,row in enumerate(rows): - data[i,:] = scores[row] / counts[row] - BenchmarkSummaryFigure([m.name for m in M],rows,data,figname,rel_only=rel_only,vcolor=vcolors) - -def GenerateRelationshipTree(S,M): - - # Create a tree which mimics the scoreboard for relationships, but - # we need - # - # root -> category -> datasets -> relationships - # - # instead of - # - # root -> category -> variable -> datasets - # - rel_tree = Node("root") - for cat in S.tree.children: - h1 = Node(cat.name) - h1.bgcolor = cat.bgcolor - h1.parent = rel_tree - rel_tree.children.append(h1) - for var in cat.children: - for data in var.children: - if data is None: continue - if data.relationships is None: continue - - # build tree - h2 = Node(data.confrontation.longname) - h1.children.append(h2) - h2.parent = h1 - h2.score = np.ma.masked_array(np.zeros(len(M)),mask=True) - for rel in data.relationships: - try: - longname = rel.longname - except: - longname = rel - v = Node(longname) - h2.children.append(v) - v.parent = h2 - v.score = np.ma.masked_array(np.zeros(len(M)),mask=True) - v.normalize_weight = 1./len(data.relationships) - path = data.confrontation.output_path - path = os.path.join(path,data.confrontation.name + ".html#Relationships") - v.confrontation = path - - # load scores - for i,m in enumerate(M): - fname = os.path.join(data.output_path,"%s_%s.nc" % (data.name,m.name)) - if not os.path.isfile(fname): continue - with Dataset(fname) as dset: - grp = dset.groups["Relationships"]["scalars"] - for rel,v in zip(data.relationships,h2.children): - try: - longname = rel.longname - except: - longname = rel - rs = [key for key in grp.variables.keys() if (longname.split("/")[0] in key and - "global" in key and - "RMSE" in key)] - if len(rs) != 1: continue - v.score[i] = grp.variables[rs[0]][...] - if "Overall Score global" not in grp.variables.keys(): continue - h2.score[i] = grp.variables["Overall Score global"][...] - - return rel_tree - - diff --git a/ilamb/ilamb/src/ILAMB/Variable.py b/ilamb/ilamb/src/ILAMB/Variable.py deleted file mode 100644 index a39f9de3..00000000 --- a/ilamb/ilamb/src/ILAMB/Variable.py +++ /dev/null @@ -1,1761 +0,0 @@ -from constants import spd,dpy,mid_months,bnd_months -from Regions import Regions -from mpl_toolkits.basemap import Basemap -import matplotlib.colors as colors -from pylab import get_cmap -from cf_units import Unit -import ilamblib as il -import Post as post -import numpy as np - -def _shiftLon(lon): - return (lon<=180)*lon + (lon>180)*(lon-360) + (lon<-180)*360 - -class Variable: - r"""A class for managing variables and their analysis. - - There are two ways to create a Variable object. Because python - does not support multiple constructors, we will use keyword - arguments so that the users intent may be captured. The first way - to specify a Variable is by loading a netCDF4 file. You can - achieve this by specifying the 'filename' and 'variable_name' - keywords. The second way is to use the remaining keyword arguments - to specify data arrays directly. If you use the second way, you - must specify the keywords 'data' and 'unit'. The rest are truly - optional and depend on the nature of your data. - - Parameters - ---------- - filename : str, optional - Name of the netCDF4 file from which to extract a variable - variable_name : str, optional - Name of the variable to extract from the netCDF4 file - data : numpy.ndarray, optional - The array which contains the data which constitutes the - variable - unit : str, optional - The unit of the input data - name : str, optional - The name of the variable, will be how it is saved in the netCDF4 - file - time : numpy.ndarray, optional - a 1D array of times in days since 1850-01-01 00:00:00 - time_bnds : numpy.ndarray, optional - a 2D array of time bounds in days since 1850-01-01 00:00:00 - lat : numpy.ndarray, optional - a 1D array of latitudes of cell centroids - lon : numpy.ndarray, optional - a 1D array of longitudes of cell centroids - area : numpy.ndarray, optional - a 2D array of the cell areas - ndata : int, optional - number of data sites this data represents - alternate_vars : list of str, optional - a list of alternate acceptable variable names - depth_bnds : numpy.ndarray, optional - a 2D array representing the boundaries of the cells in the vertical dimension - - Examples - -------- - - You can initiate a Variable by specifying the data directly. - - >>> lat = np.linspace(- 90, 90, 91) - >>> lon = np.linspace(-180,180,181) - >>> data = np.random.rand(91,181) - >>> v = Variable(name="some_variable",unit="some_unit",lat=lat,lon=lon,data=data) - - Or you can initiate a variable by extracting a specific field from a netCDF4 file. - - >>> v = Variable(filename="some_netcdf_file.nc",variable_name="name_of_var_to_extract") - - """ - def __init__(self,**keywords): - r"""Constructor for the variable class by specifying the data arrays. - """ - # See if the user specified a netCDF4 file and variable - filename = keywords.get("filename" ,None) - groupname = keywords.get("groupname" ,None) - variable_name = keywords.get("variable_name",None) - alternate_vars = keywords.get("alternate_vars",[]) - if filename is None: # if not pull data from other arguments - data = keywords.get("data" ,None) - unit = keywords.get("unit" ,None) - name = keywords.get("name" ,"unnamed") - time = keywords.get("time" ,None) - time_bnds = keywords.get("time_bnds" ,None) - lat = keywords.get("lat" ,None) - lat_bnds = keywords.get("lat_bnds" ,None) - lon = keywords.get("lon" ,None) - lon_bnds = keywords.get("lon_bnds" ,None) - depth = keywords.get("depth" ,None) - depth_bnds = keywords.get("depth_bnds" ,None) - ndata = keywords.get("ndata" ,None) - assert data is not None - assert unit is not None - cbounds = None - else: - assert variable_name is not None - t0 = keywords.get("t0",None) - tf = keywords.get("tf",None) - out = il.FromNetCDF4(filename,variable_name,alternate_vars,t0,tf,group=groupname) - data,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbounds,ndata = out - - if not np.ma.isMaskedArray(data): data = np.ma.masked_array(data) - self.data = data - self.ndata = ndata - self.unit = unit - self.name = name - self.cbounds = cbounds - - def _createBnds(x): - x = np.asarray(x) - x_bnds = np.zeros((x.size,2)) - x_bnds[+1:,0] = 0.5*(x[:-1]+x[+1:]) - x_bnds[:-1,1] = 0.5*(x[:-1]+x[+1:]) - if x.size == 1: - x_bnds[ ...] = x - else: - x_bnds[ 0,0] = x[ 0] - 0.5*(x[ 1]-x[ 0]) - x_bnds[-1,1] = x[-1] + 0.5*(x[-1]-x[-2]) - return x_bnds - - # Handle time data - self.time = time # time data - self.time_bnds = time_bnds # bounds on time - self.temporal = False # flag for temporal data - self.dt = 0. # mean temporal spacing - self.monthly = False # flag for monthly means - if time is not None: - self.temporal = True - if self.time_bnds is None: self.time_bnds = _createBnds(self.time) - self.dt = (self.time_bnds[:,1]-self.time_bnds[:,0]).mean() - if np.allclose(self.dt,30,atol=3): self.monthly = True - assert (2*self.time.size) == (self.time_bnds.size) - - # Handle space or multimember data - self.spatial = False - self.lat = lat - self.lon = lon - self.lat_bnds = lat_bnds - self.lon_bnds = lon_bnds - self.area = keywords.get("area",None) - - # Shift possible values on [0,360] to [-180,180] - if self.lon is not None: self.lon = _shiftLon(self.lon ) - if self.lon_bnds is not None: self.lon_bnds = _shiftLon(self.lon_bnds) - - # If the last dimensions are lat and lon, this is spatial data - if lat is not None and lon is not None and data.ndim >= 2: - if (data.shape[-2] == lat.size and data.shape[-1] == lon.size): self.spatial = True - - if self.spatial is True: - if np.all(np.diff(self.lat)<0): # Flip if monotonically decreasing - self.lat = self.lat [::-1 ] - self.data = self.data[...,::-1,: ] - if self.lat_bnds is not None: self.lat_bnds = self.lat_bnds[::-1,::-1] - if self.area is not None: self.area = self.area [::-1,:] - if self.lat_bnds is None: self.lat_bnds = _createBnds(self.lat) - if self.lon_bnds is None: self.lon_bnds = _createBnds(self.lon) - if self.area is None: self.area = il.CellAreas(self.lat,self.lon) - # Some data arrays are arranged such that the first column - # of data is arranged at the prime meridian. This does not - # work well with some of the plotting and/or analysis - # operations we will need to perform. These require that - # the first column be coincident with the international - # dateline. Thus we roll the data the required amount. - shift = self.lon.argmin() - self.lon = np.roll(self.lon ,-shift) - self.lon_bnds = np.roll(self.lon_bnds,-shift,axis= 0) - self.data = np.roll(self.data ,-shift,axis=-1) - self.area = np.roll(self.area ,-shift,axis=-1) - # Fix potential problems with rolling the axes of the lon_bnds - if self.lon_bnds[ 0,0] > self.lon_bnds[ 0,1]: self.lon_bnds[ 0,0] = -180. - if self.lon_bnds[-1,0] > self.lon_bnds[-1,1]: self.lon_bnds[-1,1] = +180. - # Make sure that the value lies within the bounds - assert np.all((self.lat>=self.lat_bnds[:,0])*(self.lat<=self.lat_bnds[:,1])) - assert np.all((self.lon>=self.lon_bnds[:,0])*(self.lon<=self.lon_bnds[:,1])) - - # Is the data layered - self.layered = False - self.depth = depth - self.depth_bnds = depth_bnds - if (data.ndim > (self.temporal + 2*self.spatial + (self.ndata is not None))) and depth is not None: - self.layered = True - if depth_bnds is None: self.depth_bnds = _createBnds(self.depth) - - def __str__(self): - if self.data is None: return "Uninitialized Variable" - if self.ndata is None: - ndata = "N/A" - else: - ndata = str(self.ndata) - if not self.temporal: - time = "" - else: - time = " (%d)" % self.time.size - if not self.spatial: - space = "" - else: - space = " (%d,%d)" % (self.lat.size,self.lon.size) - if not self.layered: - layer = "" - else: - layer = " (%d)" % (self.depth.size) - s = "Variable: %s\n" % self.name - s += "-"*(len(self.name)+10) + "\n" - s += "{0:>20}: ".format("unit") + self.unit + "\n" - s += "{0:>20}: ".format("isTemporal") + str(self.temporal) + time + "\n" - s += "{0:>20}: ".format("isSpatial") + str(self.spatial) + space + "\n" - s += "{0:>20}: ".format("isLayered") + str(self.layered) + layer + "\n" - s += "{0:>20}: ".format("nDatasites") + ndata + "\n" - s += "{0:>20}: ".format("dataShape") + "%s\n" % (self.data.shape,) - np.seterr(over='ignore',under='ignore') - s += "{0:>20}: ".format("dataMax") + "%e\n" % self.data.max() - s += "{0:>20}: ".format("dataMin") + "%e\n" % self.data.min() - s += "{0:>20}: ".format("dataMean") + "%e\n" % self.data.mean() - np.seterr(over='warn',under='warn') - if self.cbounds is not None: - s += "{0:>20}: ".format("climatology") + "%d thru %d\n" % (self.cbounds[0],self.cbounds[1]) - - return s - - def nbytes(self): - r"""Estimate the memory usage of a variable in bytes. - """ - nbytes = 0. - for key in self.__dict__.keys(): - try: - nbytes += self.__dict__[key].nbytes - except: - pass - return nbytes - - def integrateInTime(self,**keywords): - r"""Integrates the variable over a given time period. - - Uses nodal integration to integrate to approximate - - .. math:: \int_{t_0}^{t_f} v(t,\dots)\ dt - - The arguments of the integrand reflect that while it must be - at least defined in time, the remaining arguments are - flexible. If :math:`t_0` or :math:`t_f` are not specified, the - variable will be integrated over the extent of its time - domain. If the mean function value over time is desired, this - routine will approximate - - .. math:: \frac{1}{t_f-t_0} \int_{t_0}^{t_f} v(t,\dots)\ dt - - again by nodal integration. The amount of time which we divide - by is the non-masked amount of time. This means that if a - function has some values masked or marked as invalid, we do - not penalize the average value by including this as a time at - which data is expected. - - Parameters - ---------- - t0 : float, optional - initial time in days since 1/1/1850 - tf : float, optional - final time in days since 1/1/1850 - mean : boolean, optional - enable to divide the integrand to get the mean function value - - Returns - ------- - integral : ILAMB.Variable.Variable - a Variable instance with the integrated value along with the - appropriate name and unit change - - """ - if not self.temporal: raise il.NotTemporalVariable() - t0 = keywords.get("t0",self.time_bnds[:,0].min()) - tf = keywords.get("tf",self.time_bnds[:,1].max()) - mean = keywords.get("mean",False) - - # find which time bounds are included even partially in the interval [t0,tf] - time_bnds = np.copy(self.time_bnds) - ind = np.where((t0time_bnds[:,0]))[0] - time_bnds[(t0>time_bnds[:,0])*(t0time_bnds[:,0])*(tf 1 and self.data.mask.size > 1: - mask = np.apply_along_axis(np.all,0,self.data.mask[ind]) - integral = np.ma.masked_array(integral,mask=mask,copy=False) - - # handle units - unit = Unit(self.unit) - name = self.name + "_integrated_over_time" - - if mean: - - # divide thru by the non-masked amount of time, the units - # can remain as input because we integrate over time and - # then divide by the time interval in the same units - name += "_and_divided_by_time_period" - if self.data.mask.size > 1: - dt = (dt*(self.data.mask[ind]==0)).sum(axis=0) - else: - dt = dt.sum(axis=0) - np.seterr(over='ignore',under='ignore') - integral = integral / dt - np.seterr(over='raise' ,under='raise' ) - - else: - - # if not a mean, we need to potentially handle unit conversions - unit0 = Unit("d")*unit - unit = Unit(unit0.format().split()[-1]) - integral = unit0.convert(integral,unit) - - return Variable(data = integral, - unit = "%s" % unit, - name = name, - lat = self.lat, - lat_bnds = self.lat_bnds, - lon = self.lon, - lon_bnds = self.lon_bnds, - depth = self.depth, - depth_bnds = self.depth_bnds, - area = self.area, - ndata = self.ndata) - - def integrateInDepth(self,**keywords): - r"""Integrates the variable over a given layer limits. - - Uses nodal integration to integrate to approximate - - .. math:: \int_{z_0}^{z_f} v(z,\dots)\ dz - - The arguments of the integrand reflect that while it must be - at least defined in depth, the remaining arguments are - flexible. If :math:`z_0` or :math:`z_f` are not specified, the - variable will be integrated over the extent of its depth - domain. If the mean function value over depth is desired, this - routine will approximate - - .. math:: \frac{1}{z_f-z_0} \int_{z_0}^{z_f} v(z,\dots)\ dz - - again by nodal integration. The amount of depth which we - divide by is the non-masked amount of depth. This means that - if a function has some values masked or marked as invalid, we - do not penalize the average value by including this as a depth - at which data is expected. - - Parameters - ---------- - z0 : float, optional - initial depth in m - zf : float, optional - final depth in m - mean : boolean, optional - enable to divide the integrand to get the mean function value - - Returns - ------- - integral : ILAMB.Variable.Variable - a Variable instance with the integrated value along with the - appropriate name and unit change - - """ - if not self.layered: raise il.NotLayeredVariable() - z0 = keywords.get("z0",self.depth_bnds[:,0].min()) - zf = keywords.get("zf",self.depth_bnds[:,1].max()) - mean = keywords.get("mean",False) - - # find which time bounds are included even partially in the interval [z0,zf] - depth_bnds = np.copy(self.depth_bnds) - ind = np.where((z0depth_bnds[:,0]))[0] - depth_bnds[(z0>depth_bnds[:,0])*(z0depth_bnds[:,0])*(zf 1 and self.data.mask.size > 1: - mask = np.apply_along_axis(np.all,axis,self.data.mask[ind]) - integral = np.ma.masked_array(integral,mask=mask,copy=False) - - # handle units - unit = Unit(self.unit) - name = self.name + "_integrated_over_depth" - - if mean: - - # divide thru by the non-masked amount of time, the units - # can remain as input because we integrate over time and - # then divide by the time interval in the same units - name += "_and_divided_by_depth" - if self.data.mask.size > 1: - dz = (dz*(self.data.mask[ind]==0)).sum(axis=axis) - else: - dz = dz.sum(axis=axis) - np.seterr(over='ignore',under='ignore') - integral = integral / dz - np.seterr(over='raise' ,under='raise' ) - - else: - - # if not a mean, we need to potentially handle unit conversions - unit0 = Unit("m")*unit - unit = Unit(unit0.format().split()[-1]) - integral = unit0.convert(integral,unit) - - return Variable(data = integral, - unit = "%s" % unit, - name = name, - time = self.time, - time_bnds = self.time_bnds, - lat = self.lat, - lat_bnds = self.lat_bnds, - lon = self.lon, - lon_bnds = self.lon_bnds, - area = self.area, - ndata = self.ndata) - - def integrateInSpace(self,region=None,mean=False,weight=None,intabs=False): - r"""Integrates the variable over a given region. - - Uses nodal integration to integrate to approximate - - .. math:: \int_{\Omega} v(\mathbf{x},\dots)\ d\Omega - - The arguments of the integrand reflect that while it must be - at least defined in space, the remaining arguments are - flexible. The variable :math:`\Omega` represents the desired - region over which we will integrate. If no region is - specified, the variable will be integrated over the extent of - its spatial domain. If the mean function value over time is - desired, this routine will approximate - - .. math:: \frac{1}{A(\Omega)} \int_{\Omega} v(\mathbf{x},\dots)\ d\Omega - - again by nodal integration. The spatial area which we divide - by :math:`A(\Omega)` is the non-masked area of the given - region, also given by - - .. math:: A(\Omega) = \int_{\Omega}\ d\Omega - - This means that if a function has some values masked or marked - as invalid, we do not penalize the average value by including - this as a point at which data is expected. - - We also support the inclusion of an optional weighting - function :math:`w(\mathbf{x})` which is a function of space - only. In this case, we approximate the following integral - - .. math:: \int_{\Omega} v(\mathbf{x},\dots)w(\mathbf{x})\ d\Omega - - and if a mean value is desired, - - .. math:: \frac{1}{\int_{\Omega} w(\mathbf{x})\ d\Omega} \int_{\Omega} v(\mathbf{x},\dots)w(\mathbf{x})\ d\Omega - - Parameters - ---------- - region : str, optional - name of the region overwhich you wish to integrate - mean : bool, optional - enable to divide the integrand to get the mean function value - weight : numpy.ndarray, optional - a data array of the same shape as this variable's areas - representing an additional weight in the integrand - intabs : bool, optional - enable to integrate the absolute value - - Returns - ------- - integral : ILAMB.Variable.Variable - a Variable instace with the integrated value along with the - appropriate name and unit change. - - """ - def _integrate(var,areas): - op = lambda x : x - if intabs: op = np.abs - assert var.shape[-2:] == areas.shape - np.seterr(over='ignore',under='ignore') - vbar = (op(var)*areas).sum(axis=-1).sum(axis=-1) - np.seterr(over='raise',under='raise') - return vbar - - if not self.spatial: raise il.NotSpatialVariable() - - # determine the measure - mask = self.data.mask - while mask.ndim > 2: mask = np.all(mask,axis=0) - measure = np.ma.masked_array(self.area,mask=mask,copy=True) - if weight is not None: measure *= weight - - # if we want to integrate over a region, we need add to the - # measure's mask - r = Regions() - if region is not None: measure.mask += r.getMask(region,self) - - # approximate the integral - integral = _integrate(self.data,measure) - if mean: - np.seterr(under='ignore') - integral = integral / measure.sum() - np.seterr(under='raise') - - # handle the name and unit - name = self.name + "_integrated_over_space" - if region is not None: name = name.replace("space",region) - unit = Unit(self.unit) - if mean: - - # we have already divided thru by the non-masked area in - # units of m^2, which are the same units of the integrand. - name += "_and_divided_by_area" - else: - - # if not a mean, we need to potentially handle unit conversions - unit0 = Unit("m2")*unit - unit = Unit(unit0.format().split()[-1]) - integral = unit0.convert(integral,unit) - - return Variable(data = np.ma.masked_array(integral), - unit = "%s" % unit, - time = self.time, - time_bnds = self.time_bnds, - depth = self.depth, - depth_bnds = self.depth_bnds, - name = name) - - def siteStats(self,region=None,weight=None,intabs=False): - """Computes the mean and standard deviation of the variable over all data sites. - - Parameters - ---------- - region : str, optional - name of the region overwhich you wish to include stats. - - Returns - ------- - mean : ILAMB.Variable.Variable - a Variable instace with the mean values - - """ - if self.ndata is None: raise il.NotDatasiteVariable() - op = lambda x : x - if intabs: op = np.abs - rem_mask = np.copy(self.data.mask) - rname = "" - r = Regions() - if region is not None: - self.data.mask += r.getMask(region,self) - rname = "_over_%s" % region - np.seterr(over='ignore',under='ignore') - mean = np.ma.average(op(self.data),axis=-1,weights=weight) - np.seterr(over='raise',under='raise') - self.data.mask = rem_mask - return Variable(data = mean, - unit = self.unit, - time = self.time, - time_bnds = self.time_bnds, - depth = self.depth, - depth_bnds = self.depth_bnds, - name = "mean_%s%s" % (self.name,rname)) - - def annualCycle(self): - """Computes mean annual cycle information (climatology) for the variable. - - For each site/cell/depth in the variable, compute the mean annual cycle. - - Returns - ------- - mean : ILAMB.Variable.Variable - The annual cycle mean values - """ - if not self.temporal: raise il.NotTemporalVariable() - assert self.monthly - assert self.time.size > 11 - begin = np.argmin(self.time[:11]%365) - end = begin+int(self.time[begin:].size/12.)*12 - shp = (-1,12) + self.data.shape[1:] - v = self.data[begin:end,...].reshape(shp) - np.seterr(over='ignore',under='ignore') - mean = v.mean(axis=0) - np.seterr(over='raise',under='raise') - return Variable(data = mean, - unit = self.unit, - name = "annual_cycle_mean_of_%s" % self.name, - time = mid_months, - time_bnds = np.asarray([bnd_months[:-1],bnd_months[1:]]).T, - lat = self.lat, - lat_bnds = self.lat_bnds, - lon = self.lon, - lon_bnds = self.lon_bnds, - area = self.area, - depth = self.depth, - depth_bnds = self.depth_bnds, - ndata = self.ndata) - - def timeOfExtrema(self,etype="max"): - """Returns the time of the specified extrema. - - Parameters - ---------- - etype : str, optional - The type of extrema to compute, either 'max' or 'min' - - Returns - ------- - extrema : ILAMB.Variable.Variable - The times of the extrema computed - """ - if not self.temporal: raise il.NotTemporalVariable() - fcn = {"max":np.argmax,"min":np.argmin} - assert etype in fcn.keys() - tid = np.apply_along_axis(fcn[etype],0,self.data) - mask = False - if self.data.ndim > 1 and self.data.mask.ndim > 0: mask = np.apply_along_axis(np.all,0,self.data.mask) # mask cells where all data is masked - data = np.ma.masked_array(self.time[tid],mask=mask) - return Variable(data = data, - unit = "d", - name = "time_of_%s_%s" % (etype,self.name), - lat = self.lat, - lat_bnds = self.lat_bnds, - lon = self.lon, - lon_bnds = self.lon_bnds, - area = self.area, - depth = self.depth, - depth_bnds = self.depth_bnds, - ndata = self.ndata) - - def extractDatasites(self,lat,lon): - """Extracts a variable at sites defined by a set of latitude and longitude. - - Parameters - ---------- - lat : numpy.ndarray - an array with the latitude values, must be same size as the longitude values - lon : numpy.ndarray - an array with the longitude values, must be same size as the latitude values - - Returns - ------- - extracted : ILAMB.Variable.Variable - The extracted variables - """ - assert lat.size == lon.size - if not self.spatial: raise il.NotSpatialVariable() - ilat = np.apply_along_axis(np.argmin,1,np.abs(lat[:,np.newaxis]-self.lat)) - ilon = np.apply_along_axis(np.argmin,1,np.abs(lon[:,np.newaxis]-self.lon)) - ndata = lat.size - if self.data.ndim == 2: - data = self.data[ ilat,ilon] - else: - data = self.data[...,ilat,ilon] - return Variable(data = data, - unit = self.unit, - name = self.name, - lat = lat, - lon = lon, - ndata = ndata, - depth = self.depth, - depth_bnds = self.depth_bnds, - time = self.time, - time_bnds = self.time_bnds) - - def spatialDifference(self,var): - """Computes the point-wise difference of two spatially defined variables. - - If the variable is spatial or site data and is defined on the - same grid, this routine will simply compute the difference in - the data arrays. If the variables are spatial but defined on - separate grids, the routine will interpolate both variables to - a composed grid via nearest-neighbor interpolation and then - return the difference. - - Parameters - ---------- - var : ILAMB.Variable.Variable - The variable we wish to compare against this variable - - Returns - ------- - diff : ILAMB.Variable.Variable - A new variable object representing the difference - """ - def _make_bnds(x): - bnds = np.zeros(x.size+1) - bnds[1:-1] = 0.5*(x[1:]+x[:-1]) - bnds[0] = max(x[0] -0.5*(x[ 1]-x[ 0]),-180) - bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) - return bnds - assert Unit(var.unit) == Unit(self.unit) - assert self.temporal == False - assert self.ndata == var.ndata - assert self.layered == False - # Perform a check on the spatial grid. If it is the exact same - # grid, there is no need to interpolate. - same_grid = False - try: - same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon) - except: - pass - - if same_grid: - error = np.ma.masked_array(var.data-self.data,mask=self.data.mask+var.data.mask) - diff = Variable(data = error, - unit = var.unit, - lat = var.lat, - lat_bnds = var.lat_bnds, - lon = var.lon, - lon_bnds = var.lon_bnds, - ndata = var.ndata, - name = "%s_minus_%s" % (var.name,self.name)) - else: - if not self.spatial: raise il.NotSpatialVariable() - lat_bnd1 = _make_bnds(self.lat) - lon_bnd1 = _make_bnds(self.lon) - lat_bnd2 = _make_bnds( var.lat) - lon_bnd2 = _make_bnds( var.lon) - lat_bnd,lon_bnd,lat,lon,error = il.TrueError(lat_bnd1,lon_bnd1,self.lat,self.lon,self.data, - lat_bnd2,lon_bnd2, var.lat, var.lon, var.data) - diff = Variable(data = error, - unit = var.unit, - lat = lat, - lat_bnd = lat_bnd, - lon = lon, - lon_bnd = lon_bnd, - name = "%s_minus_%s" % (var.name,self.name)) - return diff - - def convert(self,unit,density=998.2): - """Convert the variable to a given unit. - - We use the UDUNITS library via the cf_units python interface to - convert the variable's unit. Additional support is provided - for unit conversions in which substance information is - required. For example, in quantities such as precipitation it - is common to have data in the form of a mass rate per unit - area [kg s-1 m-2] yet desire it in a linear rate [m s-1]. This - can be accomplished if the density of the substance is - known. We assume here that water is the substance, but this - can be changed by specifying the density when calling the - function. - - Parameters - ---------- - unit : str - the desired converted unit - density : float, optional - the mass density in [kg m-3] to use when converting linear - rates to area density rates - - Returns - ------- - self : ILAMB.Variable.Variable - this object with its unit converted - - """ - if unit is None: return self - src_unit = Unit(self.unit) - tar_unit = Unit( unit) - mask = self.data.mask - - # Define some generic quantities - linear = Unit("m") - linear_rate = Unit("m s-1") - area_density = Unit("kg m-2") - area_density_rate = Unit("kg m-2 s-1") - mass_density = Unit("kg m-3") - volume_conc = Unit("mol m-3") - mass_conc = Unit("mol kg-1") - - # UDUNITS doesn't handle frequently found temperature expressions - synonyms = {"K":"degK", - "R":"degR", - "C":"degC", - "F":"degF"} - for syn in synonyms.keys(): - if src_unit.format() == syn: src_unit = Unit(synonyms[syn]) - if tar_unit.format() == syn: tar_unit = Unit(synonyms[syn]) - - # Do we need to multiply by density? - if ( (src_unit.is_convertible(linear_rate) and tar_unit.is_convertible(area_density_rate)) or - (src_unit.is_convertible(linear ) and tar_unit.is_convertible(area_density )) or - (src_unit.is_convertible(mass_conc ) and tar_unit.is_convertible(volume_conc )) ): - np.seterr(over='ignore',under='ignore') - self.data *= density - np.seterr(over='raise',under='raise') - src_unit *= mass_density - - # Do we need to divide by density? - if ( (tar_unit.is_convertible(linear_rate) and src_unit.is_convertible(area_density_rate)) or - (tar_unit.is_convertible(linear ) and src_unit.is_convertible(area_density )) or - (tar_unit.is_convertible(mass_conc ) and src_unit.is_convertible(volume_conc )) ): - np.seterr(over='ignore',under='ignore') - self.data = self.data / density - np.seterr(over='raise',under='raise') - src_unit = src_unit / mass_density - - # Convert units - try: - self.data = src_unit.convert(self.data,tar_unit) - self.data = np.ma.masked_array(self.data,mask=mask) - self.unit = unit - except: - raise il.UnitConversionError() - return self - - def toNetCDF4(self,dataset,attributes=None,group=None): - """Adds the variable to the specified netCDF4 dataset. - - Parameters - ---------- - dataset : netCDF4.Dataset - a dataset into which you wish to save this variable - attributes : dict of scalars, optional - a dictionary of additional scalars to encode as ncattrs - group : str, optional - the name of the netCDF4 group to to which we add this variable - """ - def _checkTime(t,dset): - """A local function for ensuring the time dimension is saved in the dataset.""" - time_name = "time" - while True: - if time_name in dset.dimensions.keys(): - if (t.shape == dset.variables[time_name][...].shape and - np.allclose(t,dset.variables[time_name][...],atol=0.5*self.dt)): - return time_name - else: - time_name += "_" - else: - dset.createDimension(time_name) - T = dset.createVariable(time_name,"double",(time_name)) - T.setncattr("units","days since 1850-01-01 00:00:00") - T.setncattr("calendar","noleap") - T.setncattr("axis","T") - T.setncattr("long_name","time") - T.setncattr("standard_name","time") - T[...] = t - if self.time_bnds is not None: - bnd_name = time_name.replace("time","time_bnds") - T.setncattr("bounds",bnd_name) - if "nb" not in dset.dimensions.keys(): - D = dset.createDimension("nb",size=2) - if bnd_name not in dset.variables.keys(): - B = dset.createVariable(bnd_name,"double",(time_name,"nb")) - B.setncattr("units","days since 1850-01-01 00:00:00") - B[...] = self.time_bnds - return time_name - - def _checkLat(lat,dset): - """A local function for ensuring the lat dimension is saved in the dataset.""" - lat_name = "lat" - while True: - if lat_name in dset.dimensions.keys(): - if (lat.shape == dset.variables[lat_name][...].shape and - np.allclose(lat,dset.variables[lat_name][...])): - return lat_name - else: - lat_name += "_" - else: - dset.createDimension(lat_name,size=lat.size) - Y = dset.createVariable(lat_name,"double",(lat_name)) - Y.setncattr("units","degrees_north") - Y.setncattr("axis","Y") - Y.setncattr("long_name","latitude") - Y.setncattr("standard_name","latitude") - Y[...] = lat - if self.lat_bnds is not None: - bnd_name = lat_name.replace("lat","lat_bnds") - Y.setncattr("bounds",bnd_name) - if "nb" not in dset.dimensions.keys(): - D = dset.createDimension("nb",size=2) - if bnd_name not in dset.variables.keys(): - B = dset.createVariable(bnd_name,"double",(lat_name,"nb")) - B.setncattr("units","degrees_north") - B[...] = self.lat_bnds - return lat_name - - def _checkLon(lon,dset): - """A local function for ensuring the lon dimension is saved in the dataset.""" - lon_name = "lon" - while True: - if lon_name in dset.dimensions.keys(): - if (lon.shape == dset.variables[lon_name][...].shape and - np.allclose(lon,dset.variables[lon_name][...])): - return lon_name - else: - lon_name += "_" - else: - dset.createDimension(lon_name,size=lon.size) - X = dset.createVariable(lon_name,"double",(lon_name)) - X.setncattr("units","degrees_east") - X.setncattr("axis","X") - X.setncattr("long_name","longitude") - X.setncattr("standard_name","longitude") - X[...] = lon - if self.lon_bnds is not None: - bnd_name = lon_name.replace("lon","lon_bnds") - X.setncattr("bounds",bnd_name) - if "nb" not in dset.dimensions.keys(): - D = dset.createDimension("nb",size=2) - if bnd_name not in dset.variables.keys(): - B = dset.createVariable(bnd_name,"double",(lon_name,"nb")) - B.setncattr("units","degrees_east") - B[...] = self.lon_bnds - return lon_name - - def _checkData(ndata,dset): - """A local function for ensuring the data dimension is saved in the dataset.""" - data_name = "data" - while True: - if data_name in dset.dimensions.keys(): - if (ndata == len(dset.dimensions[data_name])): - return data_name - else: - data_name += "_" - else: - dset.createDimension(data_name,size=ndata) - return data_name - - def _checkLayer(layer,dataset): - """A local function for ensuring the layer dimension is saved in the dataset.""" - layer_name = "layer" - while True: - if layer_name in dataset.dimensions.keys(): - if (layer.shape == dataset.variables[layer_name][...].shape and - np.allclose(layer,dataset.variables[layer_name][...])): - return layer_name - else: - layer_name += "_" - else: - dataset.createDimension(layer_name,size=layer.size) - Z = dataset.createVariable(layer_name,"double",(layer_name)) - Z.setncattr("units","m") - Z.setncattr("axis","Z") - Z.setncattr("long_name","depth") - Z.setncattr("standard_name","depth") - Z[...] = layer - if self.depth_bnds is not None: - bnd_name = layer_name.replace("layer","layer_bnds") - Z.setncattr("bounds",bnd_name) - if "nb" not in dataset.dimensions.keys(): - D = dataset.createDimension("nb",size=2) - if bnd_name not in dataset.variables.keys(): - B = dataset.createVariable(bnd_name,"double",(layer_name,"nb")) - B.setncattr("units","m") - B[...] = self.depth_bnds - return layer_name - - # if not group is desired, just write to the dataset... - if group is None: - dset = dataset - else: - # if a group is desired, check to see it exists and write into group - if not dataset.groups.has_key(group): - dset = dataset.createGroup(group) - else: - dset = dataset.groups[group] - - dim = [] - if self.temporal: dim.append(_checkTime (self.time ,dset)) - if self.layered: dim.append(_checkLayer(self.depth,dset)) - if self.ndata is not None: - dim.append(_checkData (self.ndata,dset)) - _checkLat(self.lat,dset) - _checkLon(self.lon,dset) - else: - if self.lat is not None: dim.append(_checkLat (self.lat ,dset)) - if self.lon is not None: dim.append(_checkLon (self.lon ,dset)) - - grp = dset - if self.data.size == 1: - if not dset.groups.has_key("scalars"): - grp = dset.createGroup("scalars") - else: - grp = dset.groups["scalars"] - - V = grp.createVariable(self.name,"double",dim,zlib=True) - V.setncattr("units",self.unit) - try: - V.setncattr("max",self.data.max()) - V.setncattr("min",self.data.min()) - except: - V.setncattr("max",0) - V.setncattr("min",1) - - if self.data.size == 1: - # we are dealing with a scalar - if np.ma.is_masked(self.data): self.data = 0 - else: - # not a scalar, find the middle 98 percent of the data - data = np.ma.copy(self.data).compressed().reshape((-1)) - if data.size == 0: - V.setncattr("up99",1) - V.setncattr("dn99",0) - else: - data.sort() - V.setncattr("up99",data[min(int(round(0.99*data.size)),data.size-1)]) - V.setncattr("dn99",data[ int(round(0.01*data.size))]) - - # optionally write out more attributes - if attributes: - for key in attributes.keys(): - V.setncattr(key,attributes[key]) - - if type(self.data) is np.ma.core.MaskedConstant: - V[...] = np.nan - else: - V[...] = self.data - - def plot(self,ax,**keywords): - """Plots the variable on the given matplotlib axis. - - The behavior of this routine depends on the type of variable - specified. If the data is purely temporal, then the plot will - be a scatter plot versus time of the data. If it is purely - spatial, then the plot will be a global plot of the data. The - routine supports multiple keywords although some may not apply - to the type of plot being generated. - - Parameters - ---------- - ax : matplotlib.axes._subplots.AxesSubplot - The matplotlib axes object onto which you wish to plot the variable - lw : float, optional - The line width to use when plotting - alpha : float, optional - The degree of transparency when plotting, alpha \in [0,1] - color : str or RGB tuple, optional - The color to plot with in line plots - label : str, optional - The label to appear in the legend of line plots - vmin : float, optional - The minimum plotted value - vmax : float, optional - The maximum plotted value - region : str, optional - The region on which to display a spatial variable - cmap : str, optional - The name of the colormap to be used in plotting the spatial variable - ticks : array of floats, optional - Defines the locations of xtick - ticklabels : array of strings, optional - Defines the labels of the xticks - """ - lw = keywords.get("lw" ,1.0) - alpha = keywords.get("alpha" ,1.0) - color = keywords.get("color" ,"k") - label = keywords.get("label" ,None) - vmin = keywords.get("vmin" ,self.data.min()) - vmax = keywords.get("vmax" ,self.data.max()) - region = keywords.get("region","global") - cmap = keywords.get("cmap" ,"jet") - land = keywords.get("land" ,0.875) - water = keywords.get("water" ,0.750) - pad = keywords.get("pad" ,5.0) - - rem_mask = None - r = Regions() - if self.temporal and not self.spatial: - - ticks = keywords.get("ticks",None) - ticklabels = keywords.get("ticklabels",None) - t = self.time/365.+1850 - ax.plot(t,self.data,'-', - color = color, - lw = lw, - alpha = alpha, - label = label) - if ticks is not None: ax.set_xticks(ticks) - if ticklabels is not None: ax.set_xticklabels(ticklabels) - ax.grid('on') - ax.set_ylim(vmin,vmax) - - elif not self.temporal: - - # Mask out areas outside our region - rem_mask = np.copy(self.data.mask) - self.data.mask += r.getMask(region,self) - - # Find the figure geometry - if self.ndata: - LAT = np.ma.masked_array(self.lat,mask=self.data.mask,copy=True) - LON = np.ma.masked_array(self.lon,mask=self.data.mask,copy=True) - dateline = False - else: - LAT,LON = np.meshgrid(self.lat,self.lon,indexing='ij') - LAT = np.ma.masked_array(LAT,mask=self.data.mask,copy=False) - LON = np.ma.masked_array(LON,mask=self.data.mask,copy=False) - LAT = self.lat[(LAT.mask==False).any(axis=1)] - TF = (LON.mask==False).any(axis=0) - # do we need to shift longitudes to plot continuously - # over the dateline? - dateline = True if (TF[0] == TF[-1] == True and - (TF==False).any() and - LAT.min() < -45. and - LAT.max() > 45. ) else False - LON = self.lon[TF] - if dateline: LON = (LON>=0)*LON+(LON<0)*(LON+360) - - lat0 = LAT.min() ; latf = LAT.max() - lon0 = LON.min() ; lonf = LON.max() - latm = LAT.mean(); lonm = LON.mean() - if dateline: - LON = (LON <=180)*LON +(LON >180)*(LON -360) - lon0 = (lon0<=180)*lon0+(lon0>180)*(lon0-360) - lonf = (lonf<=180)*lonf+(lonf>180)*(lonf-360) - lonm = (lonm<=180)*lonm+(lonm>180)*(lonm-360) - area = (latf-lat0) - if dateline: - area *= (360-lonf+lon0) - else: - area *= (lonf-lon0) - - # Setup the plot projection depending on data limits - bmap = Basemap(projection = 'robin', - lon_0 = lonm, - ax = ax, - resolution = 'c') - if (lon0 < -170.) and (lonf > 170.): - if lat0 > 23.5: - bmap = Basemap(projection = 'npstere', - boundinglat = lat0-5., - lon_0 = 0., - ax = ax, - resolution = 'c') - elif latf < -23.5: - bmap = Basemap(projection = 'spstere', - boundinglat = latf+5., - lon_0 = 180., - ax = ax, - resolution = 'c') - else: - if area < 10000. and not dateline: - bmap = Basemap(projection = 'cyl', - llcrnrlon = lon0-2*pad, - llcrnrlat = lat0- pad, - urcrnrlon = lonf+2*pad, - urcrnrlat = latf+ pad, - ax = ax, - resolution = 'c') - try: - bmap.drawlsmask(land_color = str(land), - ocean_color = str(water), - lakes = True) - except: - bmap.drawcoastlines(linewidth = 0.2, - color = "darkslategrey") - - if self.spatial: - LAT,LON = np.meshgrid(self.lat,self.lon,indexing='ij') - ax = bmap.pcolormesh(LON,LAT,self.data, - latlon=True,vmin=vmin,vmax=vmax,cmap=cmap) - elif self.ndata is not None: - x,y = bmap(self.lon[self.data.mask==False], - self.lat[self.data.mask==False]) - data = self.data[self.data.mask==False] - norm = colors.Normalize(vmin,vmax) - norm = norm(data) - clmp = get_cmap(cmap) - clrs = clmp(norm) - size = 35 - ax = bmap.scatter(x,y,s=size,color=clrs,ax=ax,linewidths=0,cmap=cmap) - if rem_mask is not None: self.data.mask = rem_mask - return ax - - - def interpolate(self,time=None,lat=None,lon=None,itype='nearestneighbor'): - """Use nearest-neighbor interpolation to interpolate time and/or space at given values. - - Parameters - ---------- - time : numpy.ndarray, optional - Array of times at which to interpolate the variable - lat : numpy.ndarray, optional - Array of latitudes at which to interpolate the variable - lon : numpy.ndarray, optional - Array of longitudes at which to interpolate the variable - - Returns - ------- - var : ILAMB.Variable.Variable - The interpolated variable - """ - if time is None and lat is None and lon is None: return self - output_time = self.time if (time is None) else time - output_tbnd = self.time_bnds if (time is None) else None - output_lat = self.lat if (lat is None) else lat - output_lon = self.lon if (lon is None) else lon - output_area = self.area if (lat is None and lon is None) else None - - data = self.data - if self.spatial and (lat is not None or lon is not None): - if lat is None: lat = self.lat - if lon is None: lon = self.lon - if itype == 'nearestneighbor': - rows = np.apply_along_axis(np.argmin,1,np.abs(lat[:,np.newaxis]-self.lat)) - cols = np.apply_along_axis(np.argmin,1,np.abs(lon[:,np.newaxis]-self.lon)) - args = [] - if self.temporal: args.append(range(self.time.size)) - if self.layered: args.append(range(self.depth.size)) - args.append(rows) - args.append(cols) - ind = np.ix_(*args) - mask = data.mask[ind] - data = data.data[ind] - data = np.ma.masked_array(data,mask=mask) - frac = self.area / il.CellAreas(self.lat,self.lon).clip(1e-12) - frac = frac.clip(0,1) - frac = frac[np.ix_(rows,cols)] - output_area = frac * il.CellAreas(lat,lon) - elif itype == 'bilinear': - from scipy.interpolate import RectBivariateSpline - if self.data.ndim == 3: - halo = il.LandLinInterMissingValues(self.data) - data = np.ma.zeros((self.data.shape[:-2]+(lat.size,lon.size))) - for i in range(self.data.shape[0]): - dint = RectBivariateSpline(self.lat,self.lon, halo[i,...], kx=1,ky=1) - mint = RectBivariateSpline(self.lat,self.lon,self.data[i,...].mask,kx=1,ky=1) - data[i,...] = np.ma.masked_array(dint(lat,lon,grid=True), - mint(lat,lon,grid=True)>0.5) - frac = self.area / il.CellAreas(self.lat,self.lon).clip(1e-12) - frac = frac.clip(0,1) - frac = RectBivariateSpline(self.lat,self.lon,frac,kx=1,ky=1) - output_area = frac(lat,lon,grid=True) * il.CellAreas(lat,lon) - else: - raise ValueError("Uknown interpolation type: %s" % itype) - if self.temporal and time is not None: - times = np.apply_along_axis(np.argmin,1,np.abs(time[:,np.newaxis]-self.time)) - mask = data.mask - if mask.size > 1: mask = data.mask[times,...] - data = data.data[times,...] - data = np.ma.masked_array(data,mask=mask) - output_tbnd = self.time_bnds[times] - return Variable(data = data, unit = self.unit, name = self.name, ndata = self.ndata, - lat = output_lat, - lon = output_lon, - area = output_area, - time = output_time, - time_bnds = output_tbnd) - - def phaseShift(self,var,method="max_of_annual_cycle"): - """Computes the phase shift between a variable and this variable. - - Finds the phase shift as the time between extrema of the - annual cycles of the variables. Note that if this var and/or - the given variable are not already annual cycles, they will be - computed but not returned. - - Parameters - ---------- - var : ILAMB.Variable.Variable - The variable with which we will measure phase shift - method : str, optional - The name of the method used to compute the phase shift - - """ - assert method in ["max_of_annual_cycle","min_of_annual_cycle"] - assert self.temporal == var.temporal - v1 = self; v2 = var - if not self.temporal: - # If the data is not temporal, then the user may have - # already found the extrema. If the units of the input - # variable are days, then set the extrema to this data. - if not (self.unit == "d" and var.unit == "d"): raise il.NotTemporalVariable - e1 = v1 - e2 = v2 - else: - # While temporal, the user may have passed in the mean - # annual cycle as the variable. So if the leading - # dimension is 12 we assume the variables are already the - # annual cycles. If not, we compute the cycles and then - # compute the extrema. - if self.time.size != 12: v1 = self.annualCycle() - if var.time.size != 12: v2 = var .annualCycle() - e1 = v1.timeOfExtrema(etype=method[:3]) - e2 = v2.timeOfExtrema(etype=method[:3]) - if e1.spatial: - shift = e1.spatialDifference(e2) - else: - data = e2.data - e1.data - mask = e1.data.mask + e2.data.mask - shift = Variable(data=data,unit=e1.unit,ndata=e1.ndata,lat=e1.lat,lon=e1.lon) - shift.name = "phase_shift_of_%s" % e1.name - shift.data += (shift.data < -0.5*365.)*365. - shift.data -= (shift.data > +0.5*365.)*365. - return shift - - def correlation(self,var,ctype,region=None): - """Computes the correlation between two variables. - - Parameters - ---------- - var : ILAMB.Variable.Variable - The variable with which we will compute a correlation - ctype : str - The correlation type, one of {"spatial","temporal","spatiotemporal"} - region : str, optional - The region over which to perform a spatial correlation - - Notes - ----- - Need to better think about what correlation means when data - are masked. The sums ignore the data but then the number of - items *n* is not constant and should be reduced for masked - values. - - """ - def _correlation(x,y,axes=None): - if axes is None: axes = range(x.ndim) - if type(axes) == int: axes = (int(axes),) - axes = tuple(axes) - n = 1 - for ax in axes: n *= x.shape[ax] - xbar = x.sum(axis=axes)/n # because np.mean() doesn't take axes which are tuples - ybar = y.sum(axis=axes)/n - xy = (x*y).sum(axis=axes) - x2 = (x*x).sum(axis=axes) - y2 = (y*y).sum(axis=axes) - try: - r = (xy-n*xbar*ybar)/(np.sqrt(x2-n*xbar*xbar)*np.sqrt(y2-n*ybar*ybar)) - except: - r = np.nan - return r - - # checks on data consistency - assert region is None - assert self.data.shape == var.data.shape - assert ctype in ["spatial","temporal","spatiotemporal"] - - # determine arguments for functions - axes = None - out_time = None - out_lat = None - out_lon = None - out_area = None - out_ndata = None - if ctype == "temporal": - axes = 0 - if self.spatial: - out_lat = self.lat - out_lon = self.lon - out_area = self.area - elif self.ndata: - out_ndata = self.ndata - elif ctype == "spatial": - if self.spatial: axes = range(self.data.ndim)[-2:] - if self.ndata: axes = self.data.ndim-1 - if self.temporal: out_time = self.time - out_time_bnds = None - if out_time is not None: out_time_bnds = self.time_bnds - r = _correlation(self.data,var.data,axes=axes) - return Variable(data=r,unit="1", - name="%s_correlation_of_%s" % (ctype,self.name), - time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, - lat=out_lat,lon=out_lon,area=out_area) - - def bias(self,var): - """Computes the bias between a given variable and this variable. - - Parameters - ---------- - var : ILAMB.Variable.Variable - The variable with which we will measure bias - - Returns - ------- - bias : ILAMB.Variable.Variable - the bias - """ - # If not a temporal variable, then we assume that the user is - # passing in mean data and return the difference. - lat,lon,area = self.lat,self.lon,self.area - if not self.temporal: - assert self.temporal == var.temporal - bias = self.spatialDifference(var) - bias.name = "bias_of_%s" % self.name - return bias - if self.spatial: - # If the data is spatial, then we interpolate it on a - # common grid and take the difference. - - same_grid = False - try: - same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon) - except: - pass - if not same_grid: - lat,lon = il.ComposeSpatialGrids(self,var) - area = None - self_int = self.interpolate(lat=lat,lon=lon) - var_int = var .interpolate(lat=lat,lon=lon) - data = var_int.data-self_int.data - mask = var_int.data.mask+self_int.data.mask - else: - data = var.data -self.data - mask = var.data.mask+self.data.mask - - elif (self.ndata or self.time.size == self.data.size): - # If the data are at sites, then take the difference - data = var.data.data-self.data.data - mask = var.data.mask+self.data.mask - else: - raise il.NotSpatialVariable("Cannot take bias of scalars") - # Finally we return the temporal mean of the difference - bias = Variable(data=np.ma.masked_array(data,mask=mask), - name="bias_of_%s" % self.name,time=self.time,time_bnds=self.time_bnds, - unit=self.unit,ndata=self.ndata, - lat=lat,lon=lon,area=area, - depth_bnds = self.depth_bnds).integrateInTime(mean=True) - bias.name = bias.name.replace("_integrated_over_time_and_divided_by_time_period","") - return bias - - def rmse(self,var): - """Computes the RMSE between a given variable and this variable. - - Parameters - ---------- - var : ILAMB.Variable.Variable - The variable with which we will measure RMSE - - Returns - ------- - RMSE : ILAMB.Variable.Variable - the RMSE - - """ - # If not a temporal variable, then we assume that the user is - # passing in mean data and return the difference. - lat,lon,area = self.lat,self.lon,self.area - if not self.temporal: - assert self.temporal == var.temporal - rmse = self.spatialDifference(var) - rmse.name = "rmse_of_%s" % self.name - return rmse - if self.spatial: - # If the data is spatial, then we interpolate it on a - # common grid and take the difference. - same_grid = False - try: - same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon) - except: - pass - if not same_grid: - lat,lon = il.ComposeSpatialGrids(self,var) - area = None - self_int = self.interpolate(lat=lat,lon=lon) - var_int = var .interpolate(lat=lat,lon=lon) - data = var_int.data-self_int.data - mask = var_int.data.mask+self_int.data.mask - else: - data = var.data -self.data - mask = var.data.mask+self.data.mask - elif (self.ndata or self.time.size == self.data.size): - # If the data are at sites, then take the difference - data = var.data.data-self.data.data - mask = var.data.mask+self.data.mask - else: - raise il.NotSpatialVariable("Cannot take rmse of scalars") - # Finally we return the temporal mean of the difference squared - np.seterr(over='ignore',under='ignore') - data *= data - np.seterr(over='raise',under='raise') - rmse = Variable(data=np.ma.masked_array(data,mask=mask), - name="rmse_of_%s" % self.name,time=self.time,time_bnds=self.time_bnds, - unit=self.unit,ndata=self.ndata, - lat=lat,lon=lon,area=area, - depth_bnds = self.depth_bnds).integrateInTime(mean=True) - rmse.name = rmse.name.replace("_integrated_over_time_and_divided_by_time_period","") - rmse.data = np.sqrt(rmse.data) - return rmse - - def rms(self): - """Computes the RMS of this variable. - - Returns - ------- - RMS : ILAMB.Variable.Variable - the RMS - - """ - if not self.temporal: raise il.NotTemporalVariable() - unit = self.unit - np.seterr(over='ignore',under='ignore') - data = self.data**2 - np.seterr(over='raise',under='raise') - rms = Variable(data = data, - unit = "1", # will change later - name = "tmp", # will change later - ndata = self.ndata, - lat = self.lat, - lon = self.lon, - area = self.area, - time = self.time).integrateInTime(mean=True) - np.seterr(over='ignore',under='ignore') - rms.data = np.sqrt(rms.data) - np.seterr(over='raise',under='raise') - rms.unit = unit - rms.name = "rms_of_%s" % self.name - return rms - - def interannualVariability(self): - """Computes the interannual variability. - - The internannual variability in this case is defined as the - standard deviation of the data in the temporal dimension. - - Returns - ------- - iav : ILAMB.Variable.Variable - the interannual variability variable - """ - if not self.temporal: raise il.NotTemporalVariable - np.seterr(over='ignore',under='ignore') - data = self.data.std(axis=0) - np.seterr(over='raise',under='raise') - return Variable(data=data, - name="iav_of_%s" % self.name, - unit=self.unit,ndata=self.ndata, - lat=self.lat,lon=self.lon,area=self.area, - depth_bnds = self.depth_bnds) - - def spatialDistribution(self,var,region="global"): - r"""Evaluates how well the input variable is spatially distributed relative to this variable. - - This routine returns the normalized standard deviation and - correlation (needed for a Taylor plot) as well as a score - given as - - .. math:: \frac{4(1+R)}{((\sigma+\frac{1}{\sigma})^2 (1+R_0))} - - where :math:`R` is the correlation, :math:`R_0=1` is the - reference correlation, and :math:`\sigma` is the normalized - standard deviation. - - Parameters - ---------- - var : ILAMB.Variable.Variable - the comparison variable - region : str, optional - the name of the region over which to check the spatial distribution - - Returns - ------- - std : ILAMB.Variable.Variable - the normalized standard deviation of the input variable - R : ILAMB.Variable.Variable - the correlation of the input variable - score : ILAMB.Variable.Variable - the spatial distribution score - - """ - assert self.temporal == var.temporal == False - - r = Regions() - - # First compute the observational spatial/site standard deviation - rem_mask0 = np.copy(self.data.mask) - self.data.mask += r.getMask(region,self) - - np.seterr(over='ignore',under='ignore') - std0 = self.data.std() - np.seterr(over='raise',under='raise') - - # Next compute the model spatial/site standard deviation - rem_mask = np.copy(var.data.mask) - var.data.mask += r.getMask(region,var) - - np.seterr(over='ignore',under='ignore') - std = var.data.std() - np.seterr(over='raise',under='raise') - - # Interpolate to new grid for correlation - if self.spatial: - lat,lon = il.ComposeSpatialGrids(self,var) - self_int = self.interpolate(lat=lat,lon=lon) - var_int = var .interpolate(lat=lat,lon=lon) - else: - self_int = self - var_int = var - R = self_int.correlation(var_int,ctype="spatial") # add regions - if type(R.data) is np.ma.core.MaskedConstant: R.data = 0. - - # Restore masks - self.data.mask = rem_mask0 - var.data.mask = rem_mask - - # Put together scores, we clip the standard deviation of both - # variables at the same small amount, meant to avoid division - # by zero errors. - try: - R0 = 1.0 - std0 = std0.clip(1e-12) - std = std .clip(1e-12) - std = std/std0 - score = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0)) - except: - std = np.asarray([0.0]) - score = np.asarray([0.0]) - std = Variable(data=std ,name="normalized_spatial_std_of_%s_over_%s" % (self.name,region),unit="1") - score = Variable(data=score,name="spatial_distribution_score_of_%s_over_%s" % (self.name,region),unit="1") - return std,R,score - - def coarsenInTime(self,intervals,window=0.): - """Compute the mean function value in each of the input intervals. - - Parameters - ---------- - intervals : array of shape (n,2) - An array of n intervals where the first entry is the - beginning and the second entry is the end of the interval - window : float, optional - Extend each interval before and after by this amount of time - - Returns - ------- - coarse : ILAMB.Variable.Variable - The coarsened variable - """ - if not self.temporal: raise il.NotTemporalVariable - assert intervals.ndim == 2 - n = intervals.shape[0] - shp = (n,)+self.data.shape[1:] - time = np.zeros(n) - data = np.ma.zeros(shp) - for i in range(n): - t0 = intervals[i,0]-window - tf = intervals[i,1]+window - time[i] = 0.5*(t0+tf) - mean = self.integrateInTime(mean=True,t0=t0,tf=tf).convert(self.unit) - data[i,...] = mean.data - return Variable(name = "coarsened_%s" % self.name, - unit = self.unit, - time = time, - time_bnds = intervals, - data = data, - ndata = self.ndata, - lat = self.lat, - lon = self.lon, - area = self.area, - depth_bnds = self.depth_bnds) - - def accumulateInTime(self): - r"""For each time interval, accumulate variable from the beginning. - - For each time interval :math:`i` in the variable, defined by - :math:`[t_0^i,t_f^i]`, compute - - .. math:: \int_{t_0^0}^{t_f^i} v(t,\dots)\ dt - - This routine is useful, for example, if the variable is a mass - rate defined over time and we wish to know the mass - accumulation as a function of time. - - Returns - ------- - sum : ILAMB.Variable.Variable - The cumulative sum of this variable - - """ - if not self.temporal: raise il.NotTemporalVariable - n = self.time.size - shp = (n+1,) + self.data.shape[1:] - time = np.zeros(n+1) - data = np.ma.zeros(shp) - time[0] = self.time_bnds[0,0] - for i in range(n): - t0 = self.time_bnds[i,0] - tf = self.time_bnds[i,1] - isum = self.integrateInTime(t0=t0,tf=tf) - time[i+1] = tf - data[i+1,...] = data[i,...] + isum.data - - return Variable(name = "cumulative_sum_%s" % self.name, - unit = isum.unit, - time = time, - data = data, - lat = self.lat, - lon = self.lon, - area = self.area) - - - def trim(self,lat=None,lon=None,t=None,d=None): - """Trim away a variable in space/time in place. - - Parameters - ---------- - lat,lon,t,d : tuple or list - a 2-tuple containing the lower and upper limits beyond which we trim - """ - def _whichInterval(val,bnds): - ind = np.where((val>=bnds[:,0])*(val<=bnds[:,1]))[0] - assert ind.size <= 2 - ind = ind[0] - return ind - - if lat is not None: - assert len(lat) == 2 - if not self.spatial: raise il.NotSpatialVariable - i = _whichInterval(lat[0],self.lat_bnds) - j = _whichInterval(lat[1],self.lat_bnds)+1 - self.lat = self.lat [i:j] - self.lat_bnds = self.lat_bnds[i:j] - self.data = self.data[...,i:j,:] - self.area = self.area[ i:j,:] - if lon is not None: - assert len(lon) == 2 - if not self.spatial: raise il.NotSpatialVariable - i = _whichInterval(lon[0],self.lon_bnds) - j = _whichInterval(lon[1],self.lon_bnds)+1 - self.lon = self.lon [i:j] - self.lon_bnds = self.lon_bnds[i:j] - self.data = self.data[...,i:j] - self.area = self.area[ :,i:j] - if t is not None: - assert len(t) == 2 - if not self.temporal: raise il.NotTemporalVariable - self = il.ClipTime(self,t[0],t[1]) - if d is not None: - assert len(d) == 2 - if self.depth_bnds is None: raise ValueError - keep = (self.depth_bnds[:,1] >= d[0])*(self.depth_bnds[:,0] <= d[1]) - ind = np.where(keep)[0] - self.depth_bnds = self.depth_bnds[ind,:] - self.depth = self.depth [ind ] - self.data = self.data[...,ind,:,:] - - return self diff --git a/ilamb/ilamb/src/ILAMB/__init__.py b/ilamb/ilamb/src/ILAMB/__init__.py deleted file mode 100644 index 3bb5124a..00000000 --- a/ilamb/ilamb/src/ILAMB/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -__author__ = 'Nathan Collier' -__date__ = 'Jun 2018' -__version__ = '2.3' - -from distutils.version import LooseVersion -import platform - -# These are guesses at actual requirements -requires = { - "numpy" : "1.9.2", - "matplotlib" : "1.4.3", - "netCDF4" : "1.1.4", - "cf_units" : "2.0.0", - "mpl_toolkits.basemap" : "1.0.7", - "sympy" : "0.7.6", - "mpi4py" : "1.3.1" -} - -froms = { - "mpl_toolkits.basemap" : "Basemap" -} - -for key in requires.keys(): - if "." in key: - pkg = __import__(key, globals(), locals(), [froms[key]]) - else: - pkg = __import__(key) - if LooseVersion(pkg.__version__) < LooseVersion(requires[key]): - raise ImportError( - "Bad %s version: ILAMB %s requires %s >= %s got %s" % - (key,__version__,key,requires[key],pkg.__version__)) - - - - diff --git a/ilamb/ilamb/src/ILAMB/constants.py b/ilamb/ilamb/src/ILAMB/constants.py deleted file mode 100644 index 8d77b8f6..00000000 --- a/ilamb/ilamb/src/ILAMB/constants.py +++ /dev/null @@ -1,224 +0,0 @@ -from numpy import asarray,ones,copy as npcopy -from matplotlib.colors import from_levels_and_colors -from Regions import Regions - -__all__ = ['spm','mph','hpd','mpy','dpy_noleap','dpy_gregorian','dpy_360','dpm_noleap','dpm_gregorian','dpm_360','g_per_Pg','g_per_kg','Ar_molar_mass','C_molar_mass','N_molar_mass','O_molar_mass','CO2_molar_mass','dry_air_molar_mass','dry_air_mass','dry_air_moles','co2_g_per_ppm','co2_ppm_per_kg','co2_ppm_per_C_Pg','regions','NCARclrs','NCARcmap','NCARnorm','region_names','dpy','mid_months','spd','spy'] - -# Time constants -spm = 60. # seconds per minute -mph = 60. # minutes per hour -hpd = 24. # hours per day -spd = spm*mph*hpd -spy = spd*365. -mpy = 12. # months per year -dpy_noleap = 365.0 # days per year (for no leap year calendars) -dpy_gregorian = 365.25 # days per year -dpy_360 = 360.0 # days per year (for 30 days/month) -dpm_noleap = asarray([31,28,31,30,31,30,31,31,30,31,30,31],dtype='float') # days per month -dpm_gregorian = npcopy(dpm_noleap) ; dpm_gregorian[1] = dpm_gregorian[1] + 0.25 -dpm_360 = ones(int(mpy))*30. -mid_months = asarray([15.5,45.,74.5,105.,135.5,166.,196.5,227.5,258.,288.5,319.,349.5],dtype='float') -lbl_months = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"] -bnd_months = asarray([0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365],dtype='int') - -dpy = {"noleap" : dpy_noleap, - "365_day" : dpy_noleap, - "360_day" : dpy_360, - "gregorian" : dpy_gregorian, - "proleptic_gregorian" : dpy_gregorian} - -# Mass unit conversions -g_per_Pg = 1e+15 # grams per Pg -g_per_kg = 1e+3 # grams per kg - -# Chemical constants -Ar_molar_mass = 39.948 # grams per mole -C_molar_mass = 12.0107 # grams per mole -N_molar_mass = 14.0067 # grams per mole -O_molar_mass = 15.9994 # grams per mole -CO2_molar_mass = C_molar_mass + 2. * O_molar_mass # grams per mole - -# Atmospheric constants -dry_air_molar_mass = 0.78084*2.*N_molar_mass + 0.20946*2.*O_molar_mass + 0.00934*Ar_molar_mass + 0.00039445*CO2_molar_mass # grams per mole -dry_air_mass = 5.1352e+21 # grams -dry_air_moles = dry_air_mass / dry_air_molar_mass -co2_g_per_ppm = dry_air_moles * CO2_molar_mass / 1.e+6 -co2_ppm_per_kg = g_per_kg / co2_g_per_ppm -co2_ppm_per_C_Pg = g_per_Pg / co2_g_per_ppm * CO2_molar_mass/C_molar_mass - -# Earth constants -earth_rad = 6.371e6 # meters - - -NCARclrs = asarray([[93,0,135], - [196,0,43], - [255,35,0], - [255,140,0], - [255,207,0], - [248,255,0], - [97,210,0], - [0,197,56], - [0,242,211], - [0,144,255], - [0,0,255]],dtype=float)/255. - -# Spatial plots and their default options -space_opts = {} -space_opts["timeint"] = { "name" :"Temporally integrated period mean", - "cmap" :"choose", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit", - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_timeint.png", - "sidelbl" :"MODEL MEAN", - "haslegend" :True } - -space_opts["timeintremap"] = { "name" :"Temporally integrated remapped period mean", - "cmap" :"choose", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit", - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_timeintremap.png", - "sidelbl" :"MAPPED MODEL MEAN", - "haslegend" :True } - -space_opts["bias"] = { "name" :"Temporally integrated period mean bias", - "cmap" :"seismic", - "sym" :True, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_bias.png", - "sidelbl" :"BIAS", - "haslegend" :True } - -space_opts["biasscore"] = { "name" :"Temporally integrated period mean bias score", - "cmap" :"RdYlGn", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_biasscore.png", - "sidelbl" :"BIAS SCORE", - "haslegend" :True } - -space_opts["rmse"] = { "name" :"Temporally integrated period mean rmse", - "cmap" :"YlOrRd", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_rmse.png", - "sidelbl" :"RMSE", - "haslegend" :True } - -space_opts["rmsescore"] = { "name" :"Temporally integrated period mean rmse score", - "cmap" :"RdYlGn", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_rmsescore.png", - "sidelbl" :"RMSE SCORE", - "haslegend" :True } - -space_opts["iav"] = { "name" :"Interannual variability", - "cmap" :"Reds", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_iav.png", - "sidelbl" :"MODEL INTERANNUAL VARIABILITY", - "haslegend" :True } - -space_opts["iavscore"] = { "name" :"Interannual variability score", - "cmap" :"RdYlGn", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_iavscore.png", - "sidelbl" :"INTERANNUAL VARIABILITY SCORE", - "haslegend" :True } - -space_opts["shift"] = { "name" :"Temporally integrated mean phase shift", - "cmap" :"PRGn", - "sym" :True, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_shift.png", - "sidelbl" :"DIFFERENCE IN MAX MONTH", - "haslegend" :True } - -space_opts["shiftscore"] = { "name" :"Temporally integrated mean phase shift score", - "cmap" :"RdYlGn", - "sym" :False, - "ticks" :None, - "ticklabels":None, - "label" :"unit" , - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_shiftscore.png", - "sidelbl" :"SEASONAL CYCLE SCORE", - "haslegend" :True } - -space_opts["phase"] = { "name" :"Temporally integrated period mean max month", - "cmap" :"jet", - "sym" :False, - "ticks" :mid_months, - "ticklabels":lbl_months, - "label" :"month", - "section" :"Temporally integrated period mean", - "pattern" :"MNAME_RNAME_phase.png", - "sidelbl" :"MODEL MAX MONTH", - "haslegend" :True } - - -time_opts = {} -time_opts["spaceint"] = { "name" : "Spatially integrated regional mean", - "section" : "Spatially integrated regional mean", - "haslegend" : False, - "pattern" : "MNAME_RNAME_spaceint.png", - "sidelbl" : "REGIONAL MEAN", - "ticks" : None, - "ticklabels" : None, - "ylabel" : "unit"} - -time_opts["accumulate"] = { "name" : "Accumulated mean", - "section" : "Spatially integrated regional mean", - "haslegend" : False, - "pattern" : "MNAME_RNAME_accumulate.png", - "sidelbl" : "ACCUMULATION", - "ticks" : None, - "ticklabels" : None, - "ylabel" : "unit"} - -time_opts["cycle"] = { "name" : "Spatially integrated regional mean cycle", - "section" : "Spatially integrated regional mean", - "haslegend" : False, - "pattern" : "MNAME_RNAME_cycle.png", - "sidelbl" : "ANNUAL CYCLE", - "ticks" : mid_months/365.+1850., - "ticklabels" : lbl_months, - "ylabel" : "unit"} - -time_opts["dtcycle"] = { "name" : "Spatially integrated regional mean detrended cycle", - "section" : "Spatially integrated regional mean", - "haslegend" : False, - "pattern" : "MNAME_RNAME_dtcycle.png", - "sidelbl" : "MONTHLY ANOMALY", - "ticks" : mid_months/365.+1850., - "ticklabels" : lbl_months, - "ylabel" : "unit"} diff --git a/ilamb/ilamb/src/ILAMB/ilamblib.py b/ilamb/ilamb/src/ILAMB/ilamblib.py deleted file mode 100644 index 5f09ed02..00000000 --- a/ilamb/ilamb/src/ILAMB/ilamblib.py +++ /dev/null @@ -1,1821 +0,0 @@ -from scipy.interpolate import NearestNDInterpolator -from constants import dpy,mid_months,bnd_months -from Regions import Regions -from netCDF4 import Dataset,num2date,date2num -from datetime import datetime -from cf_units import Unit -from copy import deepcopy -from mpi4py import MPI -import numpy as np -import logging,re - -logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) - -class VarNotInFile(Exception): - def __str__(self): return "VarNotInFile" - -class VarNotMonthly(Exception): - def __str__(self): return "VarNotMonthly" - -class VarNotInModel(Exception): - def __str__(self): return "VarNotInModel" - -class VarsNotComparable(Exception): - def __str__(self): return "VarNotComparable" - -class VarNotOnTimeScale(Exception): - def __str__(self): return "VarNotOnTimeScale" - -class UnknownUnit(Exception): - def __str__(self): return "UnknownUnit" - -class AreasNotInModel(Exception): - def __str__(self): return "AreasNotInModel" - -class MisplacedData(Exception): - def __str__(self): return "MisplacedData" - -class NotTemporalVariable(Exception): - def __str__(self): return "NotTemporalVariable" - -class NotSpatialVariable(Exception): - def __str__(self): return "NotSpatialVariable" - -class UnitConversionError(Exception): - def __str__(self): return "UnitConversionError" - -class AnalysisError(Exception): - def __str__(self): return "AnalysisError" - -class NotLayeredVariable(Exception): - def __str__(self): return "NotLayeredVariable" - -class NotDatasiteVariable(Exception): - def __str__(self): return "NotDatasiteVariable" - -def FixDumbUnits(unit): - r"""Try to fix the dumb units people insist on using. - - Parameters - ---------- - unit : str - the trial unit - - Returns - ------- - unit : str - the fixed unit - """ - # Various synonyms for 1 - if unit.lower().strip() in ["unitless", - "n/a", - "none"]: unit = "1" - # Remove the C which so often is used to mean carbon but actually means coulomb - tokens = re.findall(r"[\w']+", unit) - for token in tokens: - if token.endswith("C") and Unit(token[:-1]).is_convertible(Unit("g")): - unit = unit.replace(token,token[:-1]) - return unit - -def GenerateDistinctColors(N,saturation=0.67,value=0.67): - r"""Generates a series of distinct colors. - - Computes N distinct colors using HSV color space, holding the - saturation and value levels constant and linearly vary the - hue. Colors are returned as a RGB tuple. - - Parameters - ---------- - N : int - number of distinct colors to generate - saturation : float, optional - argument of HSV color space - value : float, optional - argument of HSV color space - - Returns - ------- - RGB_tuples : list - list of N distinct RGB tuples - """ - from colorsys import hsv_to_rgb - HSV_tuples = [(x/float(N), saturation, value) for x in range(N)] - RGB_tuples = map(lambda x: hsv_to_rgb(*x), HSV_tuples) - return RGB_tuples - -def ConvertCalendar(t,tbnd=None): - r"""Converts calendar representations to a single standard. - - This routine converts the representation of time to the ILAMB - default: days since 1850-1-1 00:00:00 on a 365-day calendar. This - is so we can make comparisons with data from other models and - benchmarks. - - Parameters - ---------- - t : netCDF4 variable - the netCDF4 variable which represents time - tbnd : netCDF4 variable, optional - the netCDF4 variable which represents the bounds of time - - Returns - ------- - ta : numpy.ndarray - a numpy array of the converted times - tabnd : numpy.ndarray, optional - a numpy array of the converted boundary times - - """ - # If not calendar is given, we will assume it is 365_day - unit = t.units - if "calendar" in t.ncattrs(): - calendar = t.calendar.lower() - else: - calendar = "365_day" - - # If bounds are given, we will use those instead and later compute - # the time as the midpoint of the bounds. - if tbnd is None: - ta = t - else: - ta = tbnd - - # The datum might be different, use netCDF functions to shift it - ta = num2date(ta[...],unit ,calendar=calendar) - ta = date2num(ta ,"days since 1850-1-1",calendar=calendar) - - # Differences in calendars need to be handled differently - # depending on the intended temporal resolution. Here we introduce - # special code for different cases. - if tbnd is None: - if t[...].size == 1: - dt = 0 - else: - dt = (ta[1:]-ta[:-1]).mean() - else: - dt = (ta[:,1]-ta[:,0]).mean() - if np.allclose(dt,30,atol=3): # monthly - - tmid = np.copy(ta) - if tmid.ndim > 1: tmid = ta.mean(axis=1) - - # Determine the month index by finding to which mid_month day - # the middle time point is closest. - def _dpyShift(tmid,ta,dpy): - yrs = np.floor((tmid / float(dpy)))*365. - ind = np.abs((tmid % float(dpy))[:,np.newaxis]-mid_months).argmin(axis=1) - if ta.ndim == 1: - ta = yrs + mid_months[ind] - if ta.ndim == 2: - ta[:,0] = yrs + bnd_months[ind] - ta[:,1] = yrs + bnd_months[ind+1] - return ta - if calendar == "360_day": - ta = _dpyShift(tmid,ta,360) - elif calendar == "366_day": - ta = _dpyShift(tmid,ta,366) - elif calendar in ["365_day","noleap"]: - ta = _dpyShift(tmid,ta,365) - elif calendar in ["proleptic_gregorian","gregorian","standard","julian"]: - # we can use datetime to get the Julian day and then find - # how these line up with mid_months - tmid = num2date(tmid,"days since 1850-1-1",calendar=calendar) - yrs = [float(t.year-1850)*365. for t in tmid] - tmid = [float(t.timetuple().tm_yday) for t in tmid] - tmid = np.asarray(tmid) - ind = np.abs(tmid[:,np.newaxis]-mid_months).argmin(axis=1) - if ta.ndim == 1: - ta = yrs + mid_months[ind] - if ta.ndim == 2: - ta[:,0] = yrs + bnd_months[ind] - ta[:,1] = yrs + bnd_months[ind+1] - else: - raise ValueError("Unsupported calendar: %s" % calendar) - - if tbnd is None: return ta - t = ta.mean(axis=1) - return t,ta - -def CellAreas(lat,lon): - """Given arrays of latitude and longitude, return cell areas in square meters. - - Parameters - ---------- - lat : numpy.ndarray - a 1D array of latitudes which represent cell centroids - lon : numpy.ndarray - a 1D array of longitudes which represent cell centroids - - Returns - ------- - areas : numpy.ndarray - a 2D array of cell areas in [m2] - """ - from constants import earth_rad - - x = np.zeros(lon.size+1) - x[1:-1] = 0.5*(lon[1:]+lon[:-1]) - x[ 0] = lon[ 0]-0.5*(lon[ 1]-lon[ 0]) - x[-1] = lon[-1]+0.5*(lon[-1]-lon[-2]) - if(x.max() > 181): x -= 180 - x = x.clip(-180,180) - x *= np.pi/180. - - y = np.zeros(lat.size+1) - y[1:-1] = 0.5*(lat[1:]+lat[:-1]) - y[ 0] = lat[ 0]-0.5*(lat[ 1]-lat[ 0]) - y[-1] = lat[-1]+0.5*(lat[-1]-lat[-2]) - y = y.clip(-90,90) - y *= np.pi/180. - - dx = earth_rad*(x[1:]-x[:-1]) - dy = earth_rad*(np.sin(y[1:])-np.sin(y[:-1])) - areas = np.outer(dx,dy).T - - return areas - -def GlobalLatLonGrid(res,**keywords): - r"""Generates a latitude/longitude grid at a desired resolution - - Computes 1D arrays of latitude and longitude values which - correspond to cell interfaces and centroids at a given resolution. - - Parameters - ---------- - res : float - the desired resolution of the grid in degrees - from_zero : boolean - sets longitude convention { True:(0,360), False:(-180,180) } - - Returns - ------- - lat_bnd : numpy.ndarray - a 1D array of latitudes which represent cell interfaces - lon_bnd : numpy.ndarray - a 1D array of longitudes which represent cell interfaces - lat : numpy.ndarray - a 1D array of latitudes which represent cell centroids - lon : numpy.ndarray - a 1D array of longitudes which represent cell centroids - """ - from_zero = keywords.get("from_zero",False) - res_lat = keywords.get("res_lat",res) - res_lon = keywords.get("res_lon",res) - nlon = int(360./res_lon)+1 - nlat = int(180./res_lat)+1 - lon_bnd = np.linspace(-180,180,nlon) - if from_zero: lon_bnd += 180 - lat_bnd = np.linspace(-90,90,nlat) - lat = 0.5*(lat_bnd[1:]+lat_bnd[:-1]) - lon = 0.5*(lon_bnd[1:]+lon_bnd[:-1]) - return lat_bnd,lon_bnd,lat,lon - -def NearestNeighborInterpolation(lat1,lon1,data1,lat2,lon2): - r"""Interpolates globally grided data at another resolution - - Parameters - ---------- - lat1 : numpy.ndarray - a 1D array of latitudes of cell centroids corresponding to the - source data - lon1 : numpy.ndarray - a 1D array of longitudes of cell centroids corresponding to the - source data - data1 : numpy.ndarray - an array of data to be interpolated of shape = (lat1.size,lon1.size,...) - lat2 : numpy.ndarray - a 1D array of latitudes of cell centroids corresponding to the - target resolution - lon2 : numpy.ndarray - a 1D array of longitudes of cell centroids corresponding to the - target resolution - - Returns - ------- - data2 : numpy.ndarray - an array of interpolated data of shape = (lat2.size,lon2.size,...) - """ - rows = np.apply_along_axis(np.argmin,1,np.abs(lat2[:,np.newaxis]-lat1)) - cols = np.apply_along_axis(np.argmin,1,np.abs(lon2[:,np.newaxis]-lon1)) - data2 = data1[np.ix_(rows,cols)] - return data2 - -def TrueError(lat1_bnd,lon1_bnd,lat1,lon1,data1,lat2_bnd,lon2_bnd,lat2,lon2,data2): - r"""Computes the pointwise difference between two sets of gridded data - - To obtain the pointwise error we populate a list of common cell - interfaces and then interpolate both input arrays to the composite - grid resolution using nearest-neighbor interpolation. - - Parameters - ---------- - lat1_bnd, lon1_bnd, lat1, lon1 : numpy.ndarray - 1D arrays corresponding to the latitude/longitudes of the cell - interfaces/centroids - data1 : numpy.ndarray - an array of data to be interpolated of shape = (lat1.size,lon1.size,...) - lat2_bnd, lon2_bnd, lat2, lon2 : numpy.ndarray - 1D arrays corresponding to the latitude/longitudes of the cell - interfaces/centroids - data2 : numpy.ndarray - an array of data to be interpolated of shape = (lat2.size,lon2.size,...) - - Returns - ------- - lat_bnd, lon_bnd, lat, lon : numpy.ndarray - 1D arrays corresponding to the latitude/longitudes of the cell - interfaces/centroids of the resulting error - error : numpy array - an array of the pointwise error of shape = (lat.size,lon.size,...) - """ - # combine limits, sort and remove duplicates - lat_bnd = np.hstack((lat1_bnd,lat2_bnd)); lat_bnd.sort(); lat_bnd = np.unique(lat_bnd) - lon_bnd = np.hstack((lon1_bnd,lon2_bnd)); lon_bnd.sort(); lon_bnd = np.unique(lon_bnd) - - # need centroids of new grid for nearest-neighbor interpolation - lat = 0.5*(lat_bnd[1:]+lat_bnd[:-1]) - lon = 0.5*(lon_bnd[1:]+lon_bnd[:-1]) - - # interpolate datasets at new grid - d1 = NearestNeighborInterpolation(lat1,lon1,data1,lat,lon) - d2 = NearestNeighborInterpolation(lat2,lon2,data2,lat,lon) - - # relative to the first grid/data - error = d2-d1 - return lat_bnd,lon_bnd,lat,lon,error - -def SympifyWithArgsUnits(expression,args,units): - """Uses symbolic algebra to determine the final unit of an expression. - - Parameters - ---------- - expression : str - the expression whose units you wish to simplify - args : dict - a dictionary of numpy arrays whose keys are the - variables written in the input expression - units : dict - a dictionary of strings representing units whose keys are the - variables written in the input expression - - """ - from sympy import sympify,postorder_traversal - - expression = sympify(expression) - - # try to convert all arguments to same units if possible, it - # catches most use cases - keys = args.keys() - for i,key0 in enumerate(keys): - for key in keys[(i+1):]: - try: - Unit(units[key]).convert(args[key],Unit(units[key0]),inplace=True) - units[key] = units[key0] - except: - pass - - for expr in postorder_traversal(expression): - ekey = str(expr) - if expr.is_Add: - - # if there are scalars in the expression, these will not - # be in the units dictionary. Add them and give them an - # implicit unit of 1 - keys = [str(arg) for arg in expr.args] - for key in keys: - if not units.has_key(key): units[key] = "1" - - # if we are adding, all arguments must have the same unit. - key0 = keys[0] - for key in keys: - Unit(units[key]).convert(np.ones(1),Unit(units[key0])) - units[key] = units[key0] - units[ekey] = "%s" % (units[key0]) - - elif expr.is_Pow: - - # if raising to a power, just create the new unit - keys = [str(arg) for arg in expr.args] - units[ekey] = "(%s)%s" % (units[keys[0]],keys[1]) - - elif expr.is_Mul: - - # just create the new unit - keys = [str(arg) for arg in expr.args] - units[ekey] = " ".join(["(%s)" % units[key] for key in keys if units.has_key(key)]) - return sympify(str(expression),locals=args),units[ekey] - - -def ComputeIndexingArrays(lat2d,lon2d,lat,lon): - """Blah. - - Parameters - ---------- - lat : numpy.ndarray - A 1D array of latitudes of cell centroids - lon : numpy.ndarray - A 1D array of longitudes of cell centroids - - """ - # Prepare the interpolator - points = np.asarray([lat2d.flatten(),lon2d.flatten()]).T - values = np.asarray([(np.arange(lat2d.shape[0])[:,np.newaxis]*np.ones (lat2d.shape[1])).flatten(), - (np.ones (lat2d.shape[0])[:,np.newaxis]*np.arange(lat2d.shape[1])).flatten()]).T - fcn = NearestNDInterpolator(points,values) - LAT,LON = np.meshgrid(lat,lon,indexing='ij') - gmap = fcn(LAT.flatten(),LON.flatten()).astype(int) - return gmap[:,0].reshape(LAT.shape),gmap[:,1].reshape(LAT.shape) - -def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=None): - """Extracts data from a netCDF4 datafile for use in a Variable object. - - Intended to be used inside of the Variable constructor. Some of - the return arguments will be None depending on the contents of the - netCDF4 file. - - Parameters - ---------- - filename : str - Name of the netCDF4 file from which to extract a variable - variable_name : str - Name of the variable to extract from the netCDF4 file - alternate_vars : list of str, optional - A list of possible alternate variable names to find - t0 : float, optional - If temporal, specifying the initial time can reduce memory - usage and speed up access time. - tf : float, optional - If temporal, specifying the final time can reduce memory - usage and speed up access time. - - Returns - ------- - data : numpy.ma.masked_array - The array which contains the data which constitutes the variable - unit : str - The unit of the input data - name : str - The name of the variable, will be how it is saved in an output netCDF4 file - time : numpy.ndarray - A 1D array of times in days since 1850-01-01 00:00:00 - time_bnds : numpy.ndarray - A 1D array of time bounds in days since 1850-01-01 00:00:00 - lat : numpy.ndarray - A 1D array of latitudes of cell centroids - lon : numpy.ndarray - A 1D array of longitudes of cell centroids - area : numpy.ndarray - A 2D array of the cell areas - ndata : int - Number of data sites this data represents - depth_bnds : numpy.ndarray - A 1D array of the depth boundaries of each cell - """ - try: - dset = Dataset(filename,mode="r") - if group is None: - grp = dset - else: - grp = dset.groups[group] - except RuntimeError: - raise RuntimeError("Unable to open the file: %s" % filename) - - found = False - if variable_name in grp.variables.keys(): - found = True - var = grp.variables[variable_name] - else: - while alternate_vars.count(None) > 0: alternate_vars.pop(alternate_vars.index(None)) - for var_name in alternate_vars: - if var_name in grp.variables.keys(): - found = True - var = grp.variables[var_name] - if found == False: - alternate_vars.insert(0,variable_name) - raise RuntimeError("Unable to find [%s] in the file: %s" % (",".join(alternate_vars),filename)) - - # Check on dimensions - time_name = [name for name in var.dimensions if "time" in name.lower()] - lat_name = [name for name in var.dimensions if "lat" in name.lower()] - lon_name = [name for name in var.dimensions if "lon" in name.lower()] - data_name = [name for name in var.dimensions if "data" in name.lower()] - missed = [name for name in var.dimensions if name not in (time_name + - lat_name + - lon_name + - data_name)] - - # Lat/lon might be indexing arrays, find their shape - shp = None - if (len(lat_name) == 0 and len(lon_name) == 0 and len(missed) >= 2 and len(data_name) == 0): - # remove these dimensions from the missed variables - i,j = var.dimensions[-2],var.dimensions[-1] - if i in missed: missed.pop(missed.index(i)) - if j in missed: missed.pop(missed.index(j)) - i = grp.variables[i] - j = grp.variables[j] - if (np.issubdtype(i.dtype,np.integer) and - np.issubdtype(j.dtype,np.integer)): shp = [len(i),len(j)] - - # Lat/lon might just be sizes - if (len(lat_name) == 1 and len(lon_name) == 1): - if not (lat_name[0] in grp.variables and lon_name[0] in grp.variables): - shp = [len(grp.dimensions[lat_name[0]]),len(grp.dimensions[lon_name[0]])] - - # If these were sizes, then we need to find the correct 2D lat/lon arrays - if shp is not None: - - # We want to remove any false positives we might find. I don't - # want to consider variables which are 'bounds' or dimensions - # of others, nor those that don't have the correct shape. - bnds = [grp.variables[v].bounds for v in grp.variables if "bounds" in grp.variables[v].ncattrs()] - dims = [v for v in grp.variables if (v in grp.dimensions)] - poss = [v for v in grp.variables if (v not in dims and - v not in bnds and - np.allclose(shp,grp.variables[v].shape) if len(shp) == len(grp.variables[v].shape) else False)] - lat_name = [name for name in poss if "lat" in name.lower()] - lon_name = [name for name in poss if "lon" in name.lower()] - - # If still ambiguous, look inside the variable attributes for - # the presence of the variable name to give further - # preference. - attrs = [str(var.getncattr(attr)) for attr in var.ncattrs()] - if len(lat_name) == 0: raise ValueError("Unable to find values for the latitude dimension in %s" % (filename)) - if len(lat_name) > 1: - tmp_name = [name for name in lat_name if np.any([name in attr for attr in attrs])] - if len(tmp_name) > 0: lat_name = tmp_name - if len(lon_name) == 0: raise ValueError("Unable to find values for the longitude dimension in %s" % (filename)) - if len(lon_name) > 1: - tmp_name = [name for name in lon_name if np.any([name in attr for attr in attrs])] - if len(tmp_name) > 0: lon_name = tmp_name - - # Time dimension - if len(time_name) == 1: - time_name = time_name[0] - time_bnd_name = grp.variables[time_name].bounds if (time_name in grp.variables and - "bounds" in grp.variables[time_name].ncattrs()) else None - if time_bnd_name not in grp.variables: time_bnd_name = None - elif len(time_name) >= 1: - raise ValueError("Ambiguous choice of values for the time dimension [%s] in %s" % (",".join(time_name),filename)) - else: - time_name = None - time_bnd_name = None - - # Lat dimension - if len(lat_name) == 1: - lat_name = lat_name[0] - lat_bnd_name = grp.variables[lat_name].bounds if (lat_name in grp.variables and - "bounds" in grp.variables[lat_name].ncattrs()) else None - if lat_bnd_name not in grp.variables: lat_bnd_name = None - elif len(lat_name) >= 1: - raise ValueError("Ambiguous choice of values for the latitude dimension [%s] in %s" % (",".join(lat_name),filename)) - else: - lat_name = None - lat_bnd_name = None - - # Lon dimension - if len(lon_name) == 1: - lon_name = lon_name[0] - lon_bnd_name = grp.variables[lon_name].bounds if (lon_name in grp.variables and - "bounds" in grp.variables[lon_name].ncattrs()) else None - if lon_bnd_name not in grp.variables: lon_bnd_name = None - elif len(lon_name) >= 1: - raise ValueError("Ambiguous choice of values for the longitude dimension [%s] in %s" % (",".join(lon_name),filename)) - else: - lon_name = None - lon_bnd_name = None - - # Data dimension - if len(data_name) == 1: - data_name = data_name[0] - elif len(data_name) >= 1: - raise ValueError("Ambiguous choice of values for the data dimension [%s] in %s" % (",".join(data_name),filename)) - else: - data_name = None - - # The layered dimension is whatever is leftover since its name - # could be many things - if len(missed) == 1: - depth_name = missed[0] - depth_bnd_name = grp.variables[depth_name].bounds if (depth_name in grp.variables and - "bounds" in grp.variables[depth_name].ncattrs()) else None - if depth_bnd_name not in grp.variables: depth_bnd_name = None - elif len(missed) >= 1: - raise ValueError("Ambiguous choice of values for the layered dimension [%s] in %s" % (",".join(missed),filename)) - else: - depth_name = None - depth_bnd_name = None - - # Based on present values, get dimensions and bounds - t = None; t_bnd = None - lat = None; lat_bnd = None - lon = None; lon_bnd = None - depth = None; depth_bnd = None - data = None; - cbounds = None - if time_name is not None: - if time_bnd_name is None: - t = ConvertCalendar(grp.variables[time_name]) - else: - t,t_bnd = ConvertCalendar(grp.variables[time_name],grp.variables[time_bnd_name]) - if "climatology" in grp.variables[time_name].ncattrs(): - cbounds = grp.variables[grp.variables[time_name].climatology] - if not np.allclose(cbounds.shape,[12,2]): - raise RuntimeError("ILAMB only supports annual cycle style climatologies") - cbounds = np.round(cbounds[0,:]/365.+1850.) - if lat_name is not None: lat = grp.variables[lat_name] [...] - if lat_bnd_name is not None: lat_bnd = grp.variables[lat_bnd_name] [...] - if lon_name is not None: lon = grp.variables[lon_name] [...] - if lon_bnd_name is not None: lon_bnd = grp.variables[lon_bnd_name] [...] - if depth_name is not None: - dunit = None - if "units" in grp.variables[depth_name].ncattrs(): dunit = grp.variables[depth_name].units - depth = grp.variables[depth_name][...] - if depth_bnd_name is not None: - depth_bnd = grp.variables[depth_bnd_name][...] - if dunit is not None: - if not Unit(dunit).is_convertible(Unit("m")): - raise ValueError("Non-linear units [%s] of the layered dimension [%s] in %s" % (dunit,depth_name,filename)) - depth = Unit(dunit).convert(depth,Unit("m"),inplace=True) - if depth_bnd is not None: -<<<<<<< HEAD - depth_bnd = Unit(dunit).convert(depth_bnd,Unit("m"),inplace=True) - -||||||| merged common ancestors - depth_bnd = Units.conform(depth_bnd,Units(dunit),Units("m"),inplace=True) - -======= - depth_bnd = Unit(dunit).convert(depth_bnd,Unit("m"),inplace=True) ->>>>>>> ncar_pylib_ch - if data_name is not None: - data = len(grp.dimensions[data_name]) - # if we have data sites, there may be lat/lon data to come - # along with them although not a dimension of the variable - for key in grp.variables.keys(): - if "lat" in key: lat_name = key - if "lon" in key: lon_name = key - if lat_name is not None: lat = grp.variables[lat_name][...] - if lon_name is not None: lon = grp.variables[lon_name][...] - if lat.size != data: lat = None - if lon.size != data: lon = None - - # read in data array, roughly subset in time if bounds are - # provided for added effciency - if (t is not None) and (t0 is not None or tf is not None): - begin = 0; end = t.size - if t0 is not None: begin = max(t.searchsorted(t0)-1,begin) - if tf is not None: end = min(t.searchsorted(tf)+1,end) - v = var[begin:end,...] - t = t [begin:end] - if t_bnd is not None: - t_bnd = t_bnd[begin:end,:] - else: - v = var[...] - - # If lat and lon are 2D, then we will need to interpolate things - if lat is not None and lon is not None: - if lat.ndim == 2 and lon.ndim == 2: - assert lat.shape == lon.shape - - # Create the grid - res = 1.0 - lat_bnds = np.arange(round(lat.min(),0), - round(lat.max(),0)+res/2.,res) - lon_bnds = np.arange(round(lon.min(),0), - round(lon.max(),0)+res/2.,res) - lats = 0.5*(lat_bnds[:-1]+lat_bnds[1:]) - lons = 0.5*(lon_bnds[:-1]+lon_bnds[1:]) - ilat,ilon = ComputeIndexingArrays(lat,lon,lats,lons) - r = np.sqrt( (lat[ilat,ilon]-lats[:,np.newaxis])**2 + - (lon[ilat,ilon]-lons[np.newaxis,:])**2 ) - v = v[...,ilat,ilon] - v = np.ma.masked_array(v,mask=v.mask+(r>2*res)) - lat = lats - lon = lons - lat_bnd = np.zeros((lat.size,2)) - lat_bnd[:,0] = lat_bnds[:-1] - lat_bnd[:,1] = lat_bnds[+1:] - lon_bnd = lon_bnds - lon_bnd = np.zeros((lon.size,2)) - lon_bnd[:,0] = lon_bnds[:-1] - lon_bnd[:,1] = lon_bnds[+1:] - - # handle incorrect or absent masking of arrays - if type(v) != type(np.ma.empty(1)): - mask = np.zeros(v.shape,dtype=int) - if "_FillValue" in var.ncattrs(): mask += (np.abs(v-var._FillValue )<1e-12) - if "missing_value" in var.ncattrs(): mask += (np.abs(v-var.missing_value)<1e-12) - v = np.ma.masked_array(v,mask=mask,copy=False) - - if "units" in var.ncattrs(): - units = FixDumbUnits(var.units) - else: - units = "1" - dset.close() - - return v,units,variable_name,t,t_bnd,lat,lat_bnd,lon,lon_bnd,depth,depth_bnd,cbounds,data - -def Score(var,normalizer): - """Remaps a normalized variable to the interval [0,1]. - - Parameters - ---------- - var : ILAMB.Variable.Variable - The variable to normalize, usually represents an error of some sort - normalizer : ILAMB.Variable.Variable - The variable by which we normalize - """ - from Variable import Variable - name = var.name.replace("bias","bias_score") - name = name.replace("diff","diff_score") - name = name.replace("rmse","rmse_score") - name = name.replace("iav" ,"iav_score") - np.seterr(over='ignore',under='ignore') - data = np.exp(-np.abs(var.data/normalizer.data)) - data[data<1e-16] = 0. - np.seterr(over='raise',under='raise') - return Variable(name = name, - data = data, - unit = "1", - ndata = var.ndata, - lat = var.lat, lat_bnds = var.lat_bnds, - lon = var.lon, lon_bnds = var.lon_bnds, - area = var.area) - -def ComposeSpatialGrids(var1,var2): - """Creates a grid which conforms the boundaries of both variables. - - This routine takes the union of the latitude and longitude - cell boundaries of both grids and returns a new set of - latitudes and longitudes which represent cell centers of the - new grid. - - Parameters - ---------- - var1,var2 : ILAMB.Variable.Variable - The two variables for which we wish to find a common grid - - Returns - ------- - lat : numpy.ndarray - a 1D array of latitudes of cell centroids - lon : numpy.ndarray - a 1D array of longitudes of cell centroids - """ - if not var1.spatial: il.NotSpatialVariable() - if not var2.spatial: il.NotSpatialVariable() - def _make_bnds(x): - bnds = np.zeros(x.size+1) - bnds[1:-1] = 0.5*(x[1:]+x[:-1]) - bnds[ 0] = max(x[ 0]-0.5*(x[ 1]-x[ 0]),-180) - bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) - return bnds - lat1_bnd = _make_bnds(var1.lat) - lon1_bnd = _make_bnds(var1.lon) - lat2_bnd = _make_bnds(var2.lat) - lon2_bnd = _make_bnds(var2.lon) - lat_bnd = np.hstack((lat1_bnd,lat2_bnd)); lat_bnd.sort(); lat_bnd = np.unique(lat_bnd) - lon_bnd = np.hstack((lon1_bnd,lon2_bnd)); lon_bnd.sort(); lon_bnd = np.unique(lon_bnd) - lat = 0.5*(lat_bnd[1:]+lat_bnd[:-1]) - lon = 0.5*(lon_bnd[1:]+lon_bnd[:-1]) - return lat,lon - -def ScoreSeasonalCycle(phase_shift): - """Computes the seasonal cycle score from the phase shift. - - Possible remove this function as we do not compute other score - components via a ilamblib function. - """ - from Variable import Variable - return Variable(data = (1+np.cos(np.abs(phase_shift.data)/365*2*np.pi))*0.5, - unit = "1", - name = phase_shift.name.replace("phase_shift","phase_shift_score"), - ndata = phase_shift.ndata, - lat = phase_shift.lat, - lon = phase_shift.lon, - area = phase_shift.area) - - -def _composeGrids(v1,v2): - lat_bnds = np.unique(np.hstack([v1.lat_bnds.flatten(),v2.lat_bnds.flatten()])) - lon_bnds = np.unique(np.hstack([v1.lon_bnds.flatten(),v2.lon_bnds.flatten()])) - lat_bnds = lat_bnds[(lat_bnds>=- 90)*(lat_bnds<=+ 90)] - lon_bnds = lon_bnds[(lon_bnds>=-180)*(lon_bnds<=+180)] - lat_bnds = np.vstack([lat_bnds[:-1],lat_bnds[+1:]]).T - lon_bnds = np.vstack([lon_bnds[:-1],lon_bnds[+1:]]).T - lat = lat_bnds.mean(axis=1) - lon = lon_bnds.mean(axis=1) - return lat,lon,lat_bnds,lon_bnds - -<<<<<<< HEAD -def AnalysisMeanStateSites(ref,com,**keywords): -||||||| merged common ancestors -def AnalysisMeanState(ref,com,**keywords): -======= -def AnalysisMeanState(ref,com,**keywords): - ->>>>>>> ncar_pylib_ch - """Perform a mean state analysis. - - This mean state analysis examines the model mean state in space - and time. We compute the mean variable value over the time period - at each spatial cell or data site as appropriate, as well as the - bias and RMSE relative to the observational variable. We will - output maps of the period mean values and bias. For each spatial - cell or data site we also estimate the phase of the variable by - finding the mean time of year when the maximum occurs and the - phase shift by computing the difference in phase with respect to - the observational variable. In the spatial dimension, we compute a - spatial mean for each of the desired regions and an average annual - cycle. - - Parameters - ---------- - obs : ILAMB.Variable.Variable - the observational (reference) variable - mod : ILAMB.Variable.Variable - the model (comparison) variable - regions : list of str, optional - the regions overwhich to apply the analysis - dataset : netCDF4.Dataset, optional - a open dataset in write mode for caching the results of the - analysis which pertain to the model - benchmark_dataset : netCDF4.Dataset, optional - a open dataset in write mode for caching the results of the - analysis which pertain to the observations - space_mean : bool, optional - disable to compute sums of the variable over space instead of - mean values - table_unit : str, optional - the unit to use when displaying output in tables on the HTML page - plots_unit : str, optional - the unit to use when displaying output on plots on the HTML page - - """ - - from Variable import Variable - - regions = keywords.get("regions" ,["global"]) - dataset = keywords.get("dataset" ,None) - benchmark_dataset = keywords.get("benchmark_dataset",None) - space_mean = keywords.get("space_mean" ,True) - table_unit = keywords.get("table_unit" ,None) - plot_unit = keywords.get("plot_unit" ,None) - mass_weighting = keywords.get("mass_weighting" ,False) - skip_rmse = keywords.get("skip_rmse" ,False) - skip_iav = keywords.get("skip_iav" ,False) - skip_cycle = keywords.get("skip_cycle" ,False) - res = keywords.get("res" ,0.5) - ILAMBregions = Regions() - spatial = False - normalizer = None - - # Only study the annual cycle if it makes sense - if not ref.monthly: skip_cycle = True - if ref.time.size < 12: skip_cycle = True -<<<<<<< HEAD - if skip_rmse : skip_iav = True - -||||||| merged common ancestors - - # We find -======= - if skip_rmse : skip_iav = True - - # In order to deal with differences in grids and representation of - # land, we will interpolate both datasets to a fixed - # resolution. While not perfect we interpolate to a grid - # resolution higher than models are typically run. By convention, - # I will use capital letters for the interpolated quantities. - ->>>>>>> ncar_pylib_ch - if spatial: - junk,junk,lat,lon = GlobalLatLonGrid(res) - REF = ref.interpolate(lat=lat,lon=lon,itype='bilinear') - COM = com.interpolate(lat=lat,lon=lon,itype='bilinear') - - # We find the mean values over the time period on the original - # grid/datasites of each dataset - ref_timeint = ref.integrateInTime(mean=True) - com_timeint = com.integrateInTime(mean=True) - if spatial: - REF_timeint = REF.integrateInTime(mean=True) - COM_timeint = COM.integrateInTime(mean=True) - else: - REF = ref - COM = com - REF_timeint = ref_timeint - COM_timeint = com_timeint - if mass_weighting: normalizer = REF_timeint.data - - # Compute the bias, RMSE, and RMS maps using the interpolated - # quantities - bias = REF_timeint.bias(COM_timeint) - cREF = Variable(name = "centralized %s" % REF.name, unit = REF.unit, - data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), - time = REF.time, time_bnds = REF.time_bnds, - lat = REF.lat , lat_bnds = REF.lat_bnds, - lon = REF.lon , lon_bnds = REF.lon_bnds, - area = REF.area, ndata = REF.ndata) - crms = cREF.rms () - bias_score_map = Score(bias,crms) - if spatial: - bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask - bias_score_map = Score(bias,REF_timeint) - if not skip_rmse: - cCOM = Variable(name = "centralized %s" % COM.name, unit = COM.unit, - data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), - time = COM.time, time_bnds = COM.time_bnds, - lat = COM.lat , lat_bnds = COM.lat_bnds, - lon = COM.lon , lon_bnds = COM.lon_bnds, - area = COM.area, ndata = COM.ndata) - rmse = REF.rmse( COM) - crmse = cREF.rmse(cCOM) - rmse_score_map = Score(crmse,crms) - if not skip_iav: - ref_iav = Variable(name = "centralized %s" % ref.name, unit = ref.unit, - data = np.ma.masked_array(ref.data-ref_timeint.data[np.newaxis,...],mask=ref.data.mask), - time = ref.time, time_bnds = ref.time_bnds, - lat = ref.lat , lat_bnds = ref.lat_bnds, - lon = ref.lon , lon_bnds = ref.lon_bnds, - area = ref.area, ndata = ref.ndata).rms() - com_iav = Variable(name = "centralized %s" % com.name, unit = com.unit, - data = np.ma.masked_array(com.data-com_timeint.data[np.newaxis,...],mask=com.data.mask), - time = com.time, time_bnds = com.time_bnds, - lat = com.lat , lat_bnds = com.lat_bnds, - lon = com.lon , lon_bnds = com.lon_bnds, - area = com.area, ndata = com.ndata).rms() - REF_iav = Variable(name = "centralized %s" % REF.name, unit = REF.unit, - data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), - time = REF.time, time_bnds = REF.time_bnds, - lat = REF.lat , lat_bnds = REF.lat_bnds, - lon = REF.lon , lon_bnds = REF.lon_bnds, - area = REF.area, ndata = REF.ndata).rms() - COM_iav = Variable(name = "centralized %s" % COM.name, unit = COM.unit, - data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), - time = COM.time, time_bnds = COM.time_bnds, - lat = COM.lat , lat_bnds = COM.lat_bnds, - lon = COM.lon , lon_bnds = COM.lon_bnds, - area = COM.area, ndata = COM.ndata).rms() - iav_score_map = Score(Variable(name = "diff %s" % REF.name, unit = REF.unit, - data = (COM_iav.data-REF_iav.data), - lat = REF.lat , lat_bnds = REF.lat_bnds, - lon = REF.lon , lon_bnds = REF.lon_bnds, - area = REF.area, ndata = REF.ndata), - REF_iav) - - # The phase shift comes from the interpolated quantities - if not skip_cycle: - ref_cycle = REF.annualCycle() - com_cycle = COM.annualCycle() - ref_maxt_map = ref_cycle.timeOfExtrema(etype="max") - com_maxt_map = com_cycle.timeOfExtrema(etype="max") - shift_map = ref_maxt_map.phaseShift(com_maxt_map) - shift_score_map = ScoreSeasonalCycle(shift_map) - shift_map.data /= 30.; shift_map.unit = "months" - - # Scalars - ref_period_mean = {}; ref_spaceint = {}; ref_mean_cycle = {}; ref_dtcycle = {} - com_period_mean = {}; com_spaceint = {}; com_mean_cycle = {}; com_dtcycle = {} - bias_val = {}; bias_score = {}; rmse_val = {}; rmse_score = {} - space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {}; iav_score = {} - ref_union_mean = {}; ref_comp_mean = {} - com_union_mean = {}; com_comp_mean = {} - for region in regions: - if spatial: - ref_period_mean[region] = REF_timeint .integrateInSpace(region=region,mean=space_mean) - ref_spaceint [region] = REF .integrateInSpace(region=region,mean=True) - com_period_mean[region] = COM_timeint .integrateInSpace(region=region,mean=space_mean) - com_spaceint [region] = COM .integrateInSpace(region=region,mean=True) - bias_val [region] = bias .integrateInSpace(region=region,mean=space_mean) - bias_score [region] = bias_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) - if not skip_cycle: - ref_mean_cycle[region] = ref_cycle .integrateInSpace(region=region,mean=True) - ref_dtcycle [region] = deepcopy(ref_mean_cycle[region]) - ref_dtcycle [region].data -= ref_mean_cycle[region].data.mean() - com_mean_cycle[region] = com_cycle .integrateInSpace(region=region,mean=True) - com_dtcycle [region] = deepcopy(com_mean_cycle[region]) - com_dtcycle [region].data -= com_mean_cycle[region].data.mean() - shift [region] = shift_map .integrateInSpace(region=region,mean=True,intabs=True) - shift_score [region] = shift_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - if not skip_rmse: - rmse_val [region] = rmse .integrateInSpace(region=region,mean=space_mean) - rmse_score [region] = rmse_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) - if not skip_iav: - iav_score [region] = iav_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) - space_std[region],space_cor[region],sd_score[region] = REF_timeint.spatialDistribution(COM_timeint,region=region) - else: - ref_period_mean[region] = ref_timeint .siteStats(region=region) - ref_spaceint [region] = ref .siteStats(region=region) - com_period_mean[region] = com_timeint .siteStats(region=region) - com_spaceint [region] = com .siteStats(region=region) - bias_val [region] = bias .siteStats(region=region) - bias_score [region] = bias_score_map .siteStats(region=region,weight=normalizer) - if not skip_cycle: - ref_mean_cycle [region] = ref_cycle .siteStats(region=region) - ref_dtcycle [region] = deepcopy(ref_mean_cycle[region]) - ref_dtcycle [region].data -= ref_mean_cycle[region].data.mean() - com_mean_cycle [region] = com_cycle .siteStats(region=region) - com_dtcycle [region] = deepcopy(com_mean_cycle[region]) - com_dtcycle [region].data -= com_mean_cycle[region].data.mean() - shift [region] = shift_map .siteStats(region=region,intabs=True) - shift_score [region] = shift_score_map.siteStats(region=region,weight=normalizer) - if not skip_rmse: - rmse_val [region] = rmse .siteStats(region=region) - rmse_score [region] = rmse_score_map .siteStats(region=region,weight=normalizer) - if not skip_iav: - iav_score [region] = iav_score_map .siteStats(region=region,weight=normalizer) - - ref_period_mean[region].name = "Period Mean %s" % (region) - ref_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) - com_period_mean[region].name = "Period Mean %s" % (region) - com_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) - bias_val [region].name = "Bias %s" % (region) - bias_score [region].name = "Bias Score %s" % (region) - if not skip_rmse: - rmse_val [region].name = "RMSE %s" % (region) - rmse_score [region].name = "RMSE Score %s" % (region) - if not skip_iav: - iav_score [region].name = "Interannual Variability Score %s" % (region) - if not skip_cycle: - ref_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) - ref_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) - com_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) - com_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) - shift [region].name = "Phase Shift %s" % (region) - shift_score [region].name = "Seasonal Cycle Score %s" % (region) - if spatial: - sd_score [region].name = "Spatial Distribution Score %s" % (region) - - # Unit conversions - def _convert(var,unit): - if type(var) == type({}): - for key in var.keys(): var[key].convert(unit) - else: - var.convert(unit) - - if table_unit is not None: - for var in [ref_period_mean,com_period_mean,bias_val,rmse_val]: - _convert(var,table_unit) - if plot_unit is not None: - plot_vars = [com_timeint,ref_timeint,bias,com_spaceint,ref_spaceint,bias_val] - if not skip_rmse: plot_vars += [rmse,rmse_val] - if not skip_cycle: plot_vars += [com_mean_cycle,ref_mean_cycle,com_dtcycle,ref_dtcycle] - if not skip_iav: plot_vars += [com_iav] - for var in plot_vars: _convert(var,plot_unit) - - # Rename and optionally dump out information to netCDF4 files - com_timeint .name = "timeint_of_%s" % ref.name - bias .name = "bias_map_of_%s" % ref.name - bias_score_map .name = "biasscore_map_of_%s" % ref.name - - out_vars = [com_period_mean, - com_timeint, - com_mean_cycle, - com_dtcycle, - bias, - bias_score_map, - bias_val, - bias_score, - shift, - shift_score] - if com_spaceint[com_spaceint.keys()[0]].data.size > 1: out_vars.append(com_spaceint) - if spatial: - COM_timeint.name = "timeintremap_of_%s" % ref.name - out_vars.append(COM_timeint) - if not skip_cycle: - com_maxt_map .name = "phase_map_of_%s" % ref.name - shift_map .name = "shift_map_of_%s" % ref.name - shift_score_map.name = "shiftscore_map_of_%s" % ref.name - out_vars.append(com_maxt_map) - out_vars.append(shift_map) - out_vars.append(shift_score_map) - if not skip_rmse: - rmse .name = "rmse_map_of_%s" % ref.name - rmse_score_map.name = "rmsescore_map_of_%s" % ref.name - out_vars.append(rmse) - out_vars.append(rmse_score_map) - out_vars.append(rmse_val) - out_vars.append(rmse_score) - if not skip_iav: - com_iav.name = "iav_map_of_%s" % ref.name - iav_score_map.name = "iavscore_map_of_%s" % ref.name - out_vars.append(com_iav) - out_vars.append(iav_score_map) - out_vars.append(iav_score) - if dataset is not None: - for var in out_vars: - if type(var) == type({}): - for key in var.keys(): var[key].toNetCDF4(dataset,group="MeanState") - else: - var.toNetCDF4(dataset,group="MeanState") - for key in sd_score.keys(): - sd_score[key].toNetCDF4(dataset,group="MeanState", - attributes={"std":space_std[key].data, - "R" :space_cor[key].data}) - - # Rename and optionally dump out information to netCDF4 files - out_vars = [ref_period_mean,ref_timeint] - if ref_spaceint[ref_spaceint.keys()[0]].data.size > 1: out_vars.append(ref_spaceint) - ref_timeint .name = "timeint_of_%s" % ref.name - if not skip_cycle: - ref_maxt_map.name = "phase_map_of_%s" % ref.name - out_vars += [ref_maxt_map,ref_mean_cycle,ref_dtcycle] -<<<<<<< HEAD - if not skip_iav: - ref_iav.name = "iav_map_of_%s" % ref.name - out_vars.append(ref_iav) -||||||| merged common ancestors -======= - if not skip_iav: - ref_iav.name = "iav_map_of_%s" % ref.name - out_vars.append(ref_iav) - if spatial: - REF_timeint.name = "timeintremap_of_%s" % ref.name - out_vars.append(REF_timeint) ->>>>>>> ncar_pylib_ch - if benchmark_dataset is not None: - for var in out_vars: - if type(var) == type({}): - for key in var.keys(): var[key].toNetCDF4(benchmark_dataset,group="MeanState") - else: - var.toNetCDF4(benchmark_dataset,group="MeanState") - - return -<<<<<<< HEAD - - -def AnalysisMeanStateSpace(ref,com,**keywords): - """Perform a mean state analysis. - - This mean state analysis examines the model mean state in space - and time. We compute the mean variable value over the time period - at each spatial cell or data site as appropriate, as well as the - bias and RMSE relative to the observational variable. We will - output maps of the period mean values and bias. For each spatial - cell or data site we also estimate the phase of the variable by - finding the mean time of year when the maximum occurs and the - phase shift by computing the difference in phase with respect to - the observational variable. In the spatial dimension, we compute a - spatial mean for each of the desired regions and an average annual - cycle. -||||||| merged common ancestors - - -def AnalysisRelationship(dep_var,ind_var,dataset,rname,**keywords): - """Perform a relationship analysis. - - Expand to provide details of what exactly is done. -======= - - -def AnalysisMeanStateSpace(ref,com,**keywords): - """Perform a mean state analysis. - - This mean state analysis examines the model mean state in space - and time. We compute the mean variable value over the time period - at each spatial cell or data site as appropriate, as well as the - bias and RMSE relative to the observational variable. We will - output maps of the period mean values and bias. For each spatial - cell or data site we also estimate the phase of the variable by - finding the mean time of year when the maximum occurs and the - phase shift by computing the difference in phase with respect to - the observational variable. In the spatial dimension, we compute a - spatial mean for each of the desired regions and an average annual - cycle. ->>>>>>> ncar_pylib_ch - - Parameters - ---------- - obs : ILAMB.Variable.Variable - the observational (reference) variable - mod : ILAMB.Variable.Variable - the model (comparison) variable - regions : list of str, optional - the regions overwhich to apply the analysis - dataset : netCDF4.Dataset, optional - a open dataset in write mode for caching the results of the - analysis which pertain to the model - benchmark_dataset : netCDF4.Dataset, optional - a open dataset in write mode for caching the results of the - analysis which pertain to the observations - space_mean : bool, optional - disable to compute sums of the variable over space instead of - mean values - table_unit : str, optional - the unit to use when displaying output in tables on the HTML page - plots_unit : str, optional - the unit to use when displaying output on plots on the HTML page - - """ - from Variable import Variable - regions = keywords.get("regions" ,["global"]) - dataset = keywords.get("dataset" ,None) - benchmark_dataset = keywords.get("benchmark_dataset",None) - space_mean = keywords.get("space_mean" ,True) - table_unit = keywords.get("table_unit" ,None) - plot_unit = keywords.get("plot_unit" ,None) - mass_weighting = keywords.get("mass_weighting" ,False) - skip_rmse = keywords.get("skip_rmse" ,False) - skip_iav = keywords.get("skip_iav" ,False) - skip_cycle = keywords.get("skip_cycle" ,False) - ILAMBregions = Regions() - spatial = ref.spatial - - # Convert str types to booleans - if type(skip_rmse) == type(""): - skip_rmse = (skip_rmse.lower() == "true") - if type(skip_iav ) == type(""): - skip_iav = (skip_iav .lower() == "true") - if type(skip_cycle) == type(""): - skip_cycle = (skip_cycle.lower() == "true") - - # Check if we need to skip parts of the analysis - if not ref.monthly : skip_cycle = True - if ref.time.size < 12: skip_cycle = True - if ref.time.size == 1: skip_rmse = True - if skip_rmse : skip_iav = True - name = ref.name - - # Interpolate both reference and comparison to a grid composed of - # their cell breaks - ref.convert(plot_unit) - com.convert(plot_unit) - lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) - REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) - COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) - unit = REF.unit - area = REF.area - ndata = REF.ndata - - # Find the mean values over the time period - ref_timeint = ref.integrateInTime(mean=True).convert(plot_unit) - com_timeint = com.integrateInTime(mean=True).convert(plot_unit) - REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) - COM_timeint = COM.integrateInTime(mean=True).convert(plot_unit) - normalizer = REF_timeint.data if mass_weighting else None - - # Report period mean values over all possible representations of - # land - ref_and_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == False) - ref_not_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == True ) - com_not_ref = (REF_timeint.data.mask == True ) * (COM_timeint.data.mask == False) - if benchmark_dataset is not None: - - ref_timeint.name = "timeint_of_%s" % name - ref_timeint.toNetCDF4(benchmark_dataset,group="MeanState") - for region in regions: - - # reference period mean on original grid - ref_period_mean = ref_timeint.integrateInSpace(region=region,mean=space_mean).convert(table_unit) - ref_period_mean.name = "Period Mean (original grids) %s" % region - ref_period_mean.toNetCDF4(benchmark_dataset,group="MeanState") - - if dataset is not None: - - com_timeint.name = "timeint_of_%s" % name - com_timeint.toNetCDF4(dataset,group="MeanState") - for region in regions: - - # reference period mean on intersection of land - ref_union_mean = Variable(name = "REF_and_com", unit = REF_timeint.unit, - data = np.ma.masked_array(REF_timeint.data,mask=(ref_and_com==False)), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = REF_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) - ref_union_mean.name = "Benchmark Period Mean (intersection) %s" % region - ref_union_mean.toNetCDF4(dataset,group="MeanState") - - # reference period mean on complement of land - ref_comp_mean = Variable(name = "REF_not_com", unit = REF_timeint.unit, - data = np.ma.masked_array(REF_timeint.data,mask=(ref_not_com==False)), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = REF_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) - ref_comp_mean.name = "Benchmark Period Mean (complement) %s" % region - ref_comp_mean.toNetCDF4(dataset,group="MeanState") - - # comparison period mean on original grid - com_period_mean = com_timeint.integrateInSpace(region=region,mean=space_mean).convert(table_unit) - com_period_mean.name = "Period Mean (original grids) %s" % region - com_period_mean.toNetCDF4(dataset,group="MeanState") - - # comparison period mean on intersection of land - com_union_mean = Variable(name = "ref_and_COM", unit = COM_timeint.unit, - data = np.ma.masked_array(COM_timeint.data,mask=(ref_and_com==False)), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = COM_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) - com_union_mean.name = "Model Period Mean (intersection) %s" % region - com_union_mean.toNetCDF4(dataset,group="MeanState") - - # comparison period mean on complement of land - com_comp_mean = Variable(name = "COM_not_ref", unit = COM_timeint.unit, - data = np.ma.masked_array(COM_timeint.data,mask=(com_not_ref==False)), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = COM_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) - com_comp_mean.name = "Model Period Mean (complement) %s" % region - com_comp_mean.toNetCDF4(dataset,group="MeanState") - - # Now that we are done reporting on the intersection / complement, - # set all masks to the intersection - REF.data.mask += np.ones(REF.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) - COM.data.mask += np.ones(COM.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) - REF_timeint.data.mask = (ref_and_com==False) - COM_timeint.data.mask = (ref_and_com==False) - if mass_weighting: normalizer.mask = (ref_and_com==False) - - # Spatial Distribution: scalars and scores - if dataset is not None: - for region in regions: - space_std,space_cor,sd_score = REF_timeint.spatialDistribution(COM_timeint,region=region) - sd_score.name = "Spatial Distribution Score %s" % region - sd_score.toNetCDF4(dataset,group="MeanState", - attributes={"std":space_std.data, - "R" :space_cor.data}) - - # Cycle: maps, scalars, and scores - if not skip_cycle: - ref_cycle = REF.annualCycle() - ref_maxt_map = ref_cycle.timeOfExtrema(etype="max") - ref_maxt_map.name = "phase_map_of_%s" % name - com_cycle = COM.annualCycle() - com_maxt_map = com_cycle.timeOfExtrema(etype="max") - com_maxt_map.name = "phase_map_of_%s" % name - shift_map = ref_maxt_map.phaseShift(com_maxt_map) - shift_map.name = "shift_map_of_%s" % name - shift_score_map = ScoreSeasonalCycle(shift_map) - shift_score_map.name = "shiftscore_map_of_%s" % name - shift_map.data /= 30.; shift_map.unit = "months" - if benchmark_dataset is not None: - ref_maxt_map.toNetCDF4(benchmark_dataset,group="MeanState") - for region in regions: - ref_mean_cycle = ref_cycle.integrateInSpace(region=region,mean=True) - ref_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) - ref_mean_cycle.toNetCDF4(benchmark_dataset,group="MeanState") - ref_dtcycle = deepcopy(ref_mean_cycle) - ref_dtcycle.data -= ref_mean_cycle.data.mean() - ref_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) - ref_dtcycle.toNetCDF4(benchmark_dataset,group="MeanState") - if dataset is not None: - com_maxt_map.toNetCDF4(dataset,group="MeanState") - shift_map .toNetCDF4(dataset,group="MeanState") - shift_score_map.toNetCDF4(dataset,group="MeanState") - for region in regions: - com_mean_cycle = com_cycle.integrateInSpace(region=region,mean=True) - com_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) - com_mean_cycle.toNetCDF4(dataset,group="MeanState") - com_dtcycle = deepcopy(com_mean_cycle) - com_dtcycle.data -= com_mean_cycle.data.mean() - com_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) - com_dtcycle.toNetCDF4(dataset,group="MeanState") - shift = shift_map.integrateInSpace(region=region,mean=True,intabs=True) - shift_score = shift_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - shift .name = "Phase Shift %s" % region - shift .toNetCDF4(dataset,group="MeanState") - shift_score.name = "Seasonal Cycle Score %s" % region - shift_score.toNetCDF4(dataset,group="MeanState") - - del ref_cycle,com_cycle,shift_map,shift_score_map - - # Bias: maps, scalars, and scores - bias = REF_timeint.bias(COM_timeint).convert(plot_unit) - cREF = Variable(name = "centralized %s" % name, unit = REF.unit, - data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), - time = REF.time, time_bnds = REF.time_bnds, ndata = REF.ndata, - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = REF.area).convert(plot_unit) - REF_iav = cREF.rms() - if skip_rmse: del cREF - bias_score_map = Score(bias,REF_iav if REF.time.size > 1 else REF_timeint) - bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask - if dataset is not None: - bias.name = "bias_map_of_%s" % name - bias.toNetCDF4(dataset,group="MeanState") - bias_score_map.name = "biasscore_map_of_%s" % name - bias_score_map.toNetCDF4(dataset,group="MeanState") - for region in regions: - bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) - bias_val.name = "Bias %s" % region - bias_val.toNetCDF4(dataset,group="MeanState") - bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - bias_score.name = "Bias Score %s" % region - bias_score.toNetCDF4(dataset,group="MeanState") - del bias,bias_score_map - - # Spatial mean: plots - if REF.time.size > 1: - if benchmark_dataset is not None: - for region in regions: - ref_spaceint = REF.integrateInSpace(region=region,mean=True) - ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) - ref_spaceint.toNetCDF4(benchmark_dataset,group="MeanState") - if dataset is not None: - for region in regions: - com_spaceint = COM.integrateInSpace(region=region,mean=True) - com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) - com_spaceint.toNetCDF4(dataset,group="MeanState") - - # RMSE: maps, scalars, and scores - if not skip_rmse: - rmse = REF.rmse(COM).convert(plot_unit) - del REF - cCOM = Variable(name = "centralized %s" % name, unit = unit, - data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), - time = COM.time, time_bnds = COM.time_bnds, - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = COM.area, ndata = COM.ndata).convert(plot_unit) - del COM - crmse = cREF.rmse(cCOM).convert(plot_unit) - del cREF - if skip_iav: del cCOM - rmse_score_map = Score(crmse,REF_iav) - if dataset is not None: - rmse.name = "rmse_map_of_%s" % name - rmse.toNetCDF4(dataset,group="MeanState") - rmse_score_map.name = "rmsescore_map_of_%s" % name - rmse_score_map.toNetCDF4(dataset,group="MeanState") - for region in regions: - rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) - rmse_val.name = "RMSE %s" % region - rmse_val.toNetCDF4(dataset,group="MeanState") - rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - rmse_score.name = "RMSE Score %s" % region - rmse_score.toNetCDF4(dataset,group="MeanState") - del rmse,crmse,rmse_score_map - - # IAV: maps, scalars, scores - if not skip_iav: - COM_iav = cCOM.rms() - del cCOM - iav_score_map = Score(Variable(name = "diff %s" % name, unit = unit, - data = (COM_iav.data-REF_iav.data), - lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, - area = area, ndata = ndata), - REF_iav) - if benchmark_dataset is not None: - REF_iav.name = "iav_map_of_%s" % name - REF_iav.toNetCDF4(benchmark_dataset,group="MeanState") - if dataset is not None: - COM_iav.name = "iav_map_of_%s" % name - COM_iav.toNetCDF4(dataset,group="MeanState") - iav_score_map.name = "iavscore_map_of_%s" % name - iav_score_map.toNetCDF4(dataset,group="MeanState") - for region in regions: - iav_score = iav_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) - iav_score.name = "Interannual Variability Score %s" % region - iav_score.toNetCDF4(dataset,group="MeanState") - del COM_iav,iav_score_map - del REF_iav - - return - -def ClipTime(v,t0,tf): - """Remove time from a variable based on input bounds. - - Parameters - ---------- - v : ILAMB.Variable.Variable - the variable to trim - t0,tf : float - the times at which to trim - - Returns - ------- - vtrim : ILAMB.Variable.Variable - the trimmed variable - """ - begin = np.argmin(np.abs(v.time_bnds[:,0]-t0)) - end = np.argmin(np.abs(v.time_bnds[:,1]-tf)) - while v.time_bnds[begin,0] > t0: - begin -= 1 - if begin <= 0: - begin = 0 - break - while v.time_bnds[end, 1] < tf: - end += 1 - if end >= v.time.size-1: - end = v.time.size-1 - break - v.time = v.time [begin:(end+1) ] - v.time_bnds = v.time_bnds[begin:(end+1),...] - v.data = v.data [begin:(end+1),...] - return v - -def MakeComparable(ref,com,**keywords): - r"""Make two variables comparable. - - Given a reference variable and a comparison variable, make the two - variables comparable or raise an exception explaining why they are - not. - - Parameters - ---------- - ref : ILAMB.Variable.Variable - the reference variable object - com : ILAMB.Variable.Variable - the comparison variable object - clip_ref : bool, optional - enable in order to clip the reference variable time using the - limits of the comparison variable (defult is False) - mask_ref : bool, optional - enable in order to mask the reference variable using an - interpolation of the comparison variable (defult is False) - eps : float, optional - used to determine how close you can be to a specific time - (expressed in days since 1-1-1850) and still be considered the - same time (default is 30 minutes) - window : float, optional - specify to extend the averaging intervals (in days since - 1-1-1850) when a variable must be coarsened (default is 0) - - Returns - ------- - ref : ILAMB.Variable.Variable - the modified reference variable object - com : ILAMB.Variable.Variable - the modified comparison variable object - - """ - # Process keywords - clip_ref = keywords.get("clip_ref" ,False) - mask_ref = keywords.get("mask_ref" ,False) - eps = keywords.get("eps" ,30./60./24.) - window = keywords.get("window" ,0.) - extents = keywords.get("extents" ,np.asarray([[-90.,+90.],[-180.,+180.]])) - logstring = keywords.get("logstring","") - - # If one variable is temporal, then they both must be - if ref.temporal != com.temporal: - msg = "%s Datasets are not uniformly temporal: " % logstring - msg += "reference = %s, comparison = %s" % (ref.temporal,com.temporal) - logger.debug(msg) - raise VarsNotComparable() - - # If the reference is spatial, the comparison must be - if ref.spatial and not com.spatial: - ref = ref.extractDatasites(com.lat,com.lon) - msg = "%s The reference dataset is spatial but the comparison is site-based. " % logstring - msg += "Extracted %s sites from the reference to match the comparison." % ref.ndata - logger.info(msg) - - # If the reference is layered, the comparison must be - if ref.layered and not com.layered: - if ref.depth.size == 1: - com.layered = True - com.depth = ref.depth - com.depth_bnds = ref.depth_bnds - shp = list(com.data.shape) - insert = 0 - if com.temporal: insert = 1 - shp.insert(insert,1) - com.data = com.data.reshape(shp) - else: - msg = "%s Datasets are not uniformly layered: " % logstring - msg += "reference = %s, comparison = %s" % (ref.layered,com.layered) - logger.debug(msg) - raise NotLayeredVariable() - - # If the reference represents observation sites, extract them from - # the comparison - if ref.ndata is not None and com.spatial: com = com.extractDatasites(ref.lat,ref.lon) - - # If both variables represent observations sites, make sure you - # have the same number of sites and that they represent the same - # location. Note this is after the above extraction so at this - # point the ndata field of both variables should be equal. - if ref.ndata != com.ndata: - msg = "%s One or both datasets are understood as site data but differ in number of sites: " % logstring - msg += "reference = %d, comparison = %d" % (ref.ndata,com.ndata) - logger.debug(msg) - raise VarsNotComparable() - if ref.ndata is not None: - if not (np.allclose(ref.lat,com.lat) or np.allclose(ref.lon,com.lon)): - msg = "%s Datasets represent sites, but the locations are different: " % logstring - msg += "maximum difference lat = %.f, lon = %.f" % (np.abs((ref.lat-com.lat)).max(), - np.abs((ref.lon-com.lon)).max()) - logger.debug(msg) - raise VarsNotComparable() - - # If the datasets are both spatial, ensure that both represent the - # same spatial area and trim the datasets if not. - if ref.spatial and com.spatial: - - lat_bnds = (max(ref.lat_bnds[ 0,0],com.lat_bnds[ 0,0],extents[0,0]), - min(ref.lat_bnds[-1,1],com.lat_bnds[-1,1],extents[0,1])) - lon_bnds = (max(ref.lon_bnds[ 0,0],com.lon_bnds[ 0,0],extents[1,0]), - min(ref.lon_bnds[-1,1],com.lon_bnds[-1,1],extents[1,1])) - - # Clip reference - diff = np.abs([ref.lat_bnds[[0,-1],[0,1]]-lat_bnds, - ref.lon_bnds[[0,-1],[0,1]]-lon_bnds]) - if diff.sum() >= 5.: - shp0 = np.asarray(np.copy(ref.data.shape),dtype=int) - ref.trim(lat=lat_bnds,lon=lon_bnds) - shp = np.asarray(np.copy(ref.data.shape),dtype=int) - msg = "%s Spatial data was clipped from the reference: " % logstring - msg += " before: %s" % (shp0) - msg += " after: %s" % (shp ) - logger.info(msg) - - # Clip comparison - diff = np.abs([com.lat_bnds[[0,-1],[0,1]]-lat_bnds, - com.lon_bnds[[0,-1],[0,1]]-lon_bnds]) - if diff.sum() >= 5.: - shp0 = np.asarray(np.copy(com.data.shape),dtype=int) - com.trim(lat=lat_bnds,lon=lon_bnds) - shp = np.asarray(np.copy(com.data.shape),dtype=int) - msg = "%s Spatial data was clipped from the comparison: " % logstring - msg += " before: %s" % (shp0) - msg += " after: %s" % (shp ) - logger.info(msg) - - - if ref.temporal: - - # If the reference time scale is significantly larger than the - # comparison, coarsen the comparison - if np.log10(ref.dt/com.dt) > 0.5: - com = com.coarsenInTime(ref.time_bnds,window=window) - - # Time bounds of the reference dataset - t0 = ref.time_bnds[ 0,0] - tf = ref.time_bnds[-1,1] - - # Find the comparison time range which fully encompasses the reference - com = ClipTime(com,t0,tf) - - if clip_ref: - - # We will clip the reference dataset too - t0 = max(t0,com.time_bnds[ 0,0]) - tf = min(tf,com.time_bnds[-1,1]) - ref = ClipTime(ref,t0,tf) - - else: - - # The comparison dataset needs to fully cover the reference in time - if (com.time_bnds[ 0,0] > (t0+eps) or - com.time_bnds[-1,1] < (tf-eps)): - msg = "%s Comparison dataset does not cover the time frame of the reference: " % logstring - msg += " t0: %.16e <= %.16e (%s)" % (com.time_bnds[0, 0],t0+eps,com.time_bnds[0, 0] <= (t0+eps)) - msg += " tf: %.16e >= %.16e (%s)" % (com.time_bnds[1,-1],tf-eps,com.time_bnds[1,-1] >= (tf-eps)) - logger.debug(msg) - raise VarsNotComparable() - - # Check that we now are on the same time intervals - if ref.time.size != com.time.size: - msg = "%s Datasets have differing numbers of time intervals: " % logstring - msg += "reference = %d, comparison = %d" % (ref.time.size,com.time.size) - logger.debug(msg) - raise VarsNotComparable() - if not np.allclose(ref.time_bnds,com.time_bnds,atol=0.75*ref.dt): - msg = "%s Datasets are defined at different times" % logstring - logger.debug(msg) - raise VarsNotComparable() - - if ref.layered: - - # Try to resolve if the layers from the two quantities are - # different - if ref.depth.size == com.depth.size == 1: - ref = ref.integrateInDepth(mean = True) - com = com.integrateInDepth(mean = True) - elif ref.depth.size != com.depth.size: - # Compute the mean values from the comparison over the - # layer breaks of the reference. - if ref.depth.size == 1 and com.depth.size > 1: - com = com.integrateInDepth(z0=ref.depth_bnds[ 0,0], - zf=ref.depth_bnds[-1,1], - mean = True) - ref = ref.integrateInDepth(mean = True) # just removing the depth dimension - else: - if not np.allclose(ref.depth,com.depth): - msg = "%s Datasets have a different layering scheme" % logstring - logger.debug(msg) - raise VarsNotComparable() - - # Apply the reference mask to the comparison dataset and - # optionally vice-versa - if not ref.layered: - mask = ref.interpolate(time=com.time,lat=com.lat,lon=com.lon) - com.data.mask += mask.data.mask - if mask_ref: - mask = com.interpolate(time=ref.time,lat=ref.lat,lon=ref.lon) - ref.data.mask += mask.data.mask - - # Convert the comparison to the units of the reference - com = com.convert(ref.unit) - - return ref,com - - -def CombineVariables(V): - """Combines a list of variables into a single variable. - - This routine is intended to be used to merge variables when - separate moments in time are scattered over several files. - - Parameters - ---------- - V : list of ILAMB.Variable.Variable - a list of variables to merge into a single variable - - Returns - ------- - v : ILAMB.Variable.Variable - the merged variable - """ - from Variable import Variable - - # checks on data - assert type(V) == type([]) - for v in V: assert v.temporal - if len(V) == 1: return V[0] - - # Put list in order by initial time - V.sort(key=lambda v: v.time[0]) - - # Check the beginning and ends times for monotonicity - nV = len(V) - t0 = np.zeros(nV) - tf = np.zeros(nV) - nt = np.zeros(nV,dtype=int) - ind = [0] - for i,v in enumerate(V): - t0[i] = v.time[ 0] - tf[i] = v.time[-1] - nt[i] = v.time.size - ind.append(nt[:(i+1)].sum()) - - # Checks on monotonicity - assert (t0[1:]-t0[:-1]).min() >= 0 - assert (tf[1:]-tf[:-1]).min() >= 0 - assert (t0[1:]-tf[:-1]).min() >= 0 - - # Assemble the data - shp = (nt.sum(),)+V[0].data.shape[1:] - time = np.zeros(shp[0]) - time_bnds = np.zeros((shp[0],2)) - data = np.zeros(shp) - mask = np.zeros(shp,dtype=bool) - for i,v in enumerate(V): - time [ind[i]:ind[i+1]] = v.time - time_bnds[ind[i]:ind[i+1],...] = v.time_bnds - data [ind[i]:ind[i+1],...] = v.data - mask [ind[i]:ind[i+1],...] = v.data.mask - - # If assembled from single slice files and no time bounds were - # provided, they will not be reflective of true bounds here. If - # any dt's are 0, make time_bounds none and recompute in the - # constructor. - if np.any((time_bnds[:,1]-time_bnds[:,0])<1e-12): time_bnds = None - - v = V[0] - return Variable(data = np.ma.masked_array(data,mask=mask), - unit = v.unit, - name = v.name, - time = time, - time_bnds = time_bnds, - depth = v.depth, - depth_bnds = v.depth_bnds, - lat = v.lat, - lon = v.lon, - area = v.area, - ndata = v.ndata) - -def ConvertBoundsTypes(x): - y = None - if x.ndim == 2: - y = np.zeros(x.shape[0]+1) - y[:-1] = x[ :, 0] - y[ -1] = x[-1,-1] - if x.ndim == 1: - y = np.zeros((x.shape[0]-1,2)) - y[:,0] = x[:-1] - y[:,1] = x[+1:] - return y - -def LandLinInterMissingValues(mdata): - land = np.any(mdata.mask,axis=0)==False - data = np.ma.masked_array(mdata) - data.data[data.mask] = 0. - data.fill_value = 0. - data = data.data - land = land.astype(int) - smooth = data*land[np.newaxis,...] - suml = np.copy(land) - smooth[:,1:-1,1:-1] += data[:, :-2, :-2]*land[np.newaxis, :-2, :-2] - suml [ 1:-1,1:-1] += land[ :-2, :-2] - smooth[:,1:-1,1:-1] += data[:, :-2,1:-1]*land[np.newaxis, :-2,1:-1] - suml [ 1:-1,1:-1] += land[ :-2,1:-1] - smooth[:,1:-1,1:-1] += data[:, :-2, +2:]*land[np.newaxis, :-2, +2:] - suml [ 1:-1,1:-1] += land[ :-2, +2:] - smooth[:,1:-1,1:-1] += data[:,1:-1, :-2]*land[np.newaxis,1:-1, :-2] - suml [ 1:-1,1:-1] += land[ 1:-1, :-2] - smooth[:,1:-1,1:-1] += data[:,1:-1, +2:]*land[np.newaxis,1:-1, +2:] - suml [ 1:-1,1:-1] += land[ 1:-1, +2:] - smooth[:,1:-1,1:-1] += data[:, +2:, :-2]*land[np.newaxis, +2:, :-2] - suml [ 1:-1,1:-1] += land[ +2:, :-2] - smooth[:,1:-1,1:-1] += data[:, +2:,1:-1]*land[np.newaxis, +2:,1:-1] - suml [ 1:-1,1:-1] += land[ +2:,1:-1] - smooth[:,1:-1,1:-1] += data[:, +2:, +2:]*land[np.newaxis, +2:, +2:] - suml [ 1:-1,1:-1] += land[ +2:, +2:] - smooth /= suml.clip(1) - smooth = (mdata.mask==True)*smooth + (mdata.mask==False)*mdata.data - return smooth diff --git a/ilamb/ilamb/test/makefile b/ilamb/ilamb/test/makefile deleted file mode 100644 index a777a097..00000000 --- a/ilamb/ilamb/test/makefile +++ /dev/null @@ -1,15 +0,0 @@ -NP = 1 - -test_sample: - @mpirun -n ${NP} ilamb-run \ - --config test.cfg \ - --model_root ${ILAMB_ROOT}/MODELS/CLM/ \ - --models CLM50r243CRUNCEP CLM50r243GSWP3 \ - --regions global shsa - python score_diff.py scores_test.csv.gold _build/scores.csv - -test_run_script: - nosetests test_run_script.py - -clean: - @rm -rf _build diff --git a/ilamb/ilamb/test/score_diff.py b/ilamb/ilamb/test/score_diff.py deleted file mode 100644 index 7ec0c82a..00000000 --- a/ilamb/ilamb/test/score_diff.py +++ /dev/null @@ -1,18 +0,0 @@ -import numpy as np -import sys -if len(sys.argv) != 3: - print "usage: python score_diff.py scores1.csv scores2.csv" - sys.exit(1) -gold = np.recfromcsv(sys.argv[1]) -test = np.recfromcsv(sys.argv[2]) -assert gold.dtype == test.dtype -ok = True -for model in gold.dtype.names[1:]: - if not np.allclose(test[model],gold[model]): - ok = False - diff = np.abs(test[model]-gold[model])/gold[model] - for i in range(diff.size): - if diff[i] > 1e-12: - print "%s | %s | %.6f%% " % (gold['variables'][i],model,diff[i]*100.) -if not ok: sys.exit(1) -print "Test passed" diff --git a/ilamb/ilamb/test/scores_test.csv.gold b/ilamb/ilamb/test/scores_test.csv.gold deleted file mode 100644 index 6fccfb93..00000000 --- a/ilamb/ilamb/test/scores_test.csv.gold +++ /dev/null @@ -1,9 +0,0 @@ -Variables,CLM50r243CRUNCEP,CLM50r243GSWP3 -Biomass,0.5957104653413856,0.6783045750117078 -Gross Primary Productivity,0.6217211297637607,0.6126273585798891 -Global Net Ecosystem Carbon Balance,0.7054000637266042,0.8636690794621101 -Net Ecosystem Exchange,0.3941918077804778,0.38120476926634617 -Terrestrial Water Storage Anomaly,0.7000653021257858,0.7269702240175762 -Albedo,0.5434663466148166,0.544587485316599 -Surface Air Temperature,0.9256731031865132,0.9314748385926337 -Precipitation,0.7555153501937276,0.7679655805094326 diff --git a/ilamb/ilamb/test/test.cfg b/ilamb/ilamb/test/test.cfg deleted file mode 100644 index 57446284..00000000 --- a/ilamb/ilamb/test/test.cfg +++ /dev/null @@ -1,134 +0,0 @@ - -[h1: Ecosystem and Carbon Cycle] -bgcolor = "#ECFFE6" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Biomass] -variable = "biomass" -alternate_vars = "cVeg" -weight = 5 -skip_rmse = True -mass_weighting = True - -[GlobalCarbon] -source = "DATA/biomass/GLOBAL.CARBON/biomass_0.5x0.5.nc" -weight = 16 -table_unit = "Pg" -plot_unit = "kg m-2" -space_mean = False - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Gross Primary Productivity] -variable = "gpp" -cmap = "Greens" -weight = 5 -mass_weighting = True - -[Fluxnet] -source = "DATA/gpp/FLUXNET/gpp.nc" -weight = 9 -table_unit = "g m-2 d-1" -plot_unit = "g m-2 d-1" - -[GBAF] -source = "DATA/gpp/GBAF/gpp_0.5x0.5.nc" -weight = 15 -table_unit = "Pg yr-1" -plot_unit = "g m-2 d-1" -space_mean = False -skip_iav = True -relationships = "Precipitation/GPCP2","SurfaceAirTemperature/CRU" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Global Net Ecosystem Carbon Balance] -variable = "nbp" -weight = 5 -ctype = "ConfNBP" - -[Hoffman] -source = "DATA/nbp/HOFFMAN/nbp_1850-2010.nc" -weight = 20 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Net Ecosystem Exchange] -variable = "nee" -derived = "gpp-ra-rh" -weight = 5 -mass_weighting = True - -[GBAF] -source = "DATA/nee/GBAF/nee_0.5x0.5.nc" -weight = 4 -table_unit = "Pg yr-1" -plot_unit = "g m-2 d-1" -space_mean = False -skip_iav = True - -########################################################################### - -[h1: Hydrology Cycle] -bgcolor = "#E6F9FF" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Terrestrial Water Storage Anomaly] -variable = "twsa" -alternate_vars = "tws" -cmap = "Blues" -weight = 5 -ctype = "ConfTWSA" - -[GRACE] -source = "DATA/twsa/GRACE/twsa_0.5x0.5.nc" -weight = 25 - -########################################################################### - -[h1: Radiation and Energy Cycle] -bgcolor = "#FFECE6" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Albedo] -variable = "albedo" -derived = "rsus/rsds" -weight = 1 - -[MODIS] -source = "DATA/albedo/MODIS/albedo_0.5x0.5.nc" -weight = 20 - -########################################################################### - -[h1: Forcings] -bgcolor = "#EDEDED" - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Surface Air Temperature] -variable = "tas" -weight = 2 - -[CRU] -source = "DATA/tas/CRU/tas_0.5x0.5.nc" -weight = 25 - -#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -[h2: Precipitation] -variable = "pr" -cmap = "Blues" -weight = 2 -mass_weighting = True - -[GPCP2] -source = "DATA/pr/GPCP2/pr_0.5x0.5.nc" -land = True -weight = 20 -table_unit = "mm d-1" -plot_unit = "mm d-1" -space_mean = True diff --git a/ilamb/ilamb/test/test_Variable.py b/ilamb/ilamb/test/test_Variable.py deleted file mode 100644 index dd01d319..00000000 --- a/ilamb/ilamb/test/test_Variable.py +++ /dev/null @@ -1,157 +0,0 @@ -from ILAMB.Variable import Variable -import ILAMB.ilamblib as il -import numpy as np -import os - -def test_integrateInTime(variables): - head = "\n--- Testing integrateInTime() " - print "%s%s\n" % (head,"-"*(120-len(head))) - for vdict in variables: - var = vdict["var"] - try: - vdict["timeint"] = var.integrateInTime() - vdict["timeint_mean"] = var.integrateInTime(mean=True) - print vdict["timeint"] - print vdict["timeint_mean"] - except il.NotTemporalVariable: - pass - -def test_integrateInSpace(variables): - head = "\n--- Testing integrateInSpace() " - print "%s%s\n" % (head,"-"*(120-len(head))) - for vdict in variables: - var = vdict["var"] - try: - vdict["spaceint"] = var.integrateInSpace() - vdict["spaceint_mean"] = var.integrateInSpace(mean=True) - vdict["spaceint_amazon"] = var.integrateInSpace(region="amazon") - vdict["spaceint_amazon_mean"] = var.integrateInSpace(region="amazon",mean=True) - print vdict["spaceint"] - print vdict["spaceint_mean"] - print vdict["spaceint_amazon"] - print vdict["spaceint_amazon_mean"] - except il.NotSpatialVariable: - pass - -def test_annualCycle(variables): - head = "\n--- Testing annualCycle() " - print "%s%s\n" % (head,"-"*(120-len(head))) - for vdict in variables: - var = vdict["var"] - try: - # note: not testing std, max, and min (assuming ok since functions are similar) - vdict["cycle"],junk,junk,junk = var.annualCycle() - print vdict["cycle"] - except il.NotTemporalVariable: - pass - -def test_timeOfExtrema(variables): - head = "\n--- Testing timeOfExtrema() " - print "%s%s\n" % (head,"-"*(120-len(head))) - for vdict in variables: - var = vdict["var"] - try: - # note: not testing min time (assuming ok since functions are similar) - vdict["maxtime"] = var.timeOfExtrema() - print vdict["maxtime"] - except il.NotTemporalVariable: - pass - -def test_interpolate(variables): - head = "\n--- Testing interpolate() " - print "%s%s\n" % (head,"-"*(120-len(head))) - for vdict in variables: - var = vdict["var"] - if var.temporal: - t = np.linspace(var.time.min(),var.time.max(),10) - vdict["interp1"] = var.interpolate(time=t) - if var.spatial: - lat = np.linspace(- 90, 90,30) - lon = np.linspace(-180,180,60) - vdict["interp2"] = var.interpolate(lat=lat,lon=lon) - vdict["interp3"] = var.interpolate(lat=lat) - vdict["interp4"] = var.interpolate(lon=lon) - if var.temporal and var.spatial: - vdict["interp5"] = var.interpolate(time=t,lat=lat,lon=lon) - vdict["interp6"] = var.interpolate(time=t,lat=lat) - vdict["interp7"] = var.interpolate(time=t,lon=lon) - for key in vdict.keys(): - if "interp" in key: print vdict[key] - -def test_phaseShift(variables): - head = "\n--- Testing phaseShift() " - print "%s%s\n" % (head,"-"*(120-len(head))) - for vdict in variables: - var = vdict["var"] - try: - vdict["shift"] = var.phaseShift(var) - vdict["shift_min"] = var.phaseShift(var,method="min_of_annual_cycle") - print vdict["shift"] - print vdict["shift_min"] - if vdict.has_key("cycle"): - vdict["shift_fast"] = vdict["cycle"].phaseShift(vdict["cycle"]) - print vdict["shift_fast"] - except il.NotTemporalVariable: - pass - -def test_correlation(variables): - head = "\n--- Testing correlation() " - print "%s%s\n" % (head,"-"*(120-len(head))) - for vdict in variables: - var = vdict["var"] - try: - if var.spatial or var.ndata: - vdict["corr_spatial"] = var.correlation(var,"spatial") - print vdict["corr_spatial"] - if var.temporal: - vdict["corr_temporal"] = var.correlation(var,"temporal") - print vdict["corr_temporal"] - if var.spatial and var.temporal: - vdict["corr_both"] = var.correlation(var,"spatiotemporal") - print vdict["corr_both"] - - except il.NotTemporalVariable: - pass - -def test_bias(variables): - head = "\n--- Testing bias() " - print "%s%s\n" % (head,"-"*(120-len(head))) - for vdict in variables: - var = vdict["var"] - try: - vdict["bias"] = var.bias(var) - print vdict["bias"] - except il.NotSpatialVariable: - pass - -# Setup different types of variables -gpp = {} -gpp["var"] = Variable(filename = os.environ["ILAMB_ROOT"]+"/DATA/gpp/FLUXNET-MTE/derived/gpp.nc", - variable_name = "gpp") -le = {} -le["var"] = Variable(filename = os.environ["ILAMB_ROOT"]+"/DATA/le/FLUXNET/derived/le.nc", - variable_name = "le") -co2 = {} -co2["var"] = Variable(filename = os.environ["ILAMB_ROOT"]+"/DATA/co2/MAUNA.LOA/derived/co2_1959-2013.nc", - variable_name = "co2") -pi = {} -pi["var"] = Variable(data = np.pi, - unit = "-", - name = "pi") - -variables = [gpp,le,co2,pi] - -head = "\n--- Found the following variables for testing " -print "%s%s\n" % (head,"-"*(120-len(head))) -for vdict in variables: - print vdict["var"] - -test_integrateInTime(variables) -test_integrateInSpace(variables) -test_annualCycle(variables) -test_timeOfExtrema(variables) -test_interpolate(variables) -test_phaseShift(variables) -test_correlation(variables) -test_bias(variables) - diff --git a/ilamb/ilamb/test/test_run_script.py b/ilamb/ilamb/test/test_run_script.py deleted file mode 100644 index 46bd6bc8..00000000 --- a/ilamb/ilamb/test/test_run_script.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Nosetests for the ILAMB run script.""" -import os -import subprocess -from nose.tools import assert_equal, raises - - -run_cmd = 'ilamb-run' - - -def test_help_argument(): - r = subprocess.call([run_cmd, '--help']) - assert_equal(r, 0) - - -@raises(subprocess.CalledProcessError) -def test_config_argument_not_set(): - r = subprocess.check_call([run_cmd]) From bcae91828bb5bfd8dcb016816ddde2d91f45353b Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Thu, 16 Aug 2018 16:50:14 -0600 Subject: [PATCH 17/22] Squashed 'ilamb/ilamb/' content from commit 5f35ec3 git-subtree-dir: ilamb/ilamb git-subtree-split: 5f35ec3e07f7ea8441429ea0c3e864d1aa8340e1 --- .gitignore | 15 + README.rst | 70 ++ bin/ilamb-doctor | 134 +++ bin/ilamb-fetch | 97 ++ bin/ilamb-mean | 177 ++++ bin/ilamb-run | 574 +++++++++++ bin/ilamb-table | 65 ++ demo/diurnal.cfg | 76 ++ demo/ilamb.cfg | 551 ++++++++++ demo/iomb.cfg | 65 ++ demo/sample.cfg | 18 + doc/Makefile | 186 ++++ doc/_templates/class.rst | 25 + doc/add_data.rst | 150 +++ doc/add_model.rst | 117 +++ doc/conf.py | 357 +++++++ doc/confront.rst | 294 ++++++ doc/custom_regions.rst | 124 +++ doc/first_steps.rst | 233 +++++ doc/format_data.rst | 220 ++++ doc/ilamb_doctor.rst | 61 ++ doc/ilamb_fetch.rst | 37 + doc/ilamb_run.rst | 177 ++++ doc/index.rst | 39 + doc/install.rst | 278 +++++ doc/overview.rst | 203 ++++ doc/packageapi.rst | 132 +++ doc/tutorial.rst | 39 + setup.py | 109 ++ src/ILAMB/ConfDiurnal.py | 199 ++++ src/ILAMB/ConfEvapFraction.py | 65 ++ src/ILAMB/ConfIOMB.py | 628 ++++++++++++ src/ILAMB/ConfNBP.py | 198 ++++ src/ILAMB/ConfPermafrost.py | 223 ++++ src/ILAMB/ConfRunoff.py | 215 ++++ src/ILAMB/ConfTWSA.py | 247 +++++ src/ILAMB/Confrontation.py | 1105 ++++++++++++++++++++ src/ILAMB/ModelResult.py | 346 +++++++ src/ILAMB/Post.py | 1218 ++++++++++++++++++++++ src/ILAMB/Regions.py | 194 ++++ src/ILAMB/Scoreboard.py | 699 +++++++++++++ src/ILAMB/Variable.py | 1746 ++++++++++++++++++++++++++++++++ src/ILAMB/__init__.py | 35 + src/ILAMB/constants.py | 224 ++++ src/ILAMB/ilamblib.py | 1794 +++++++++++++++++++++++++++++++++ test/makefile | 15 + test/score_diff.py | 18 + test/scores_test.csv.gold | 9 + test/test.cfg | 134 +++ test/test_Variable.py | 157 +++ test/test_run_script.py | 17 + 51 files changed, 14109 insertions(+) create mode 100644 .gitignore create mode 100644 README.rst create mode 100644 bin/ilamb-doctor create mode 100644 bin/ilamb-fetch create mode 100644 bin/ilamb-mean create mode 100644 bin/ilamb-run create mode 100644 bin/ilamb-table create mode 100644 demo/diurnal.cfg create mode 100644 demo/ilamb.cfg create mode 100644 demo/iomb.cfg create mode 100644 demo/sample.cfg create mode 100644 doc/Makefile create mode 100644 doc/_templates/class.rst create mode 100644 doc/add_data.rst create mode 100644 doc/add_model.rst create mode 100644 doc/conf.py create mode 100644 doc/confront.rst create mode 100644 doc/custom_regions.rst create mode 100644 doc/first_steps.rst create mode 100644 doc/format_data.rst create mode 100644 doc/ilamb_doctor.rst create mode 100644 doc/ilamb_fetch.rst create mode 100644 doc/ilamb_run.rst create mode 100644 doc/index.rst create mode 100644 doc/install.rst create mode 100644 doc/overview.rst create mode 100644 doc/packageapi.rst create mode 100644 doc/tutorial.rst create mode 100644 setup.py create mode 100644 src/ILAMB/ConfDiurnal.py create mode 100644 src/ILAMB/ConfEvapFraction.py create mode 100644 src/ILAMB/ConfIOMB.py create mode 100644 src/ILAMB/ConfNBP.py create mode 100644 src/ILAMB/ConfPermafrost.py create mode 100644 src/ILAMB/ConfRunoff.py create mode 100644 src/ILAMB/ConfTWSA.py create mode 100644 src/ILAMB/Confrontation.py create mode 100644 src/ILAMB/ModelResult.py create mode 100644 src/ILAMB/Post.py create mode 100644 src/ILAMB/Regions.py create mode 100644 src/ILAMB/Scoreboard.py create mode 100644 src/ILAMB/Variable.py create mode 100644 src/ILAMB/__init__.py create mode 100644 src/ILAMB/constants.py create mode 100644 src/ILAMB/ilamblib.py create mode 100644 test/makefile create mode 100644 test/score_diff.py create mode 100644 test/scores_test.csv.gold create mode 100644 test/test.cfg create mode 100644 test/test_Variable.py create mode 100644 test/test_run_script.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..e1b1aa8a --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +.gitignore +RDict.db +RDict.log +*.pyc +*~ +*.orig +#*# +*# +*.DS_Store +*build* +*graveyard* +*.egg-info +dist +*_generated* +src/ILAMB/generated_version.py \ No newline at end of file diff --git a/README.rst b/README.rst new file mode 100644 index 00000000..ebbe75ee --- /dev/null +++ b/README.rst @@ -0,0 +1,70 @@ +The ILAMB Benchmarking System +============================= + +The International Land Model Benchmarking (ILAMB) project is a +model-data intercomparison and integration project designed to improve +the performance of land models and, in parallel, improve the design of +new measurement campaigns to reduce uncertainties associated with key +land surface processes. Building upon past model evaluation studies, +the goals of ILAMB are to: + +* develop internationally accepted benchmarks for land model + performance, promote the use of these benchmarks by the + international community for model intercomparison, +* strengthen linkages between experimental, remote sensing, and + climate modeling communities in the design of new model tests and + new measurement programs, and +* support the design and development of a new, open source, + benchmarking software system for use by the international community. + +It is the last of these goals to which this repository is +concerned. We have developed a python-based generic benchmarking +system, initially focused on assessing land model performance. + +Useful Information +------------------ + +* `Documentation `_ - installation and + basic usage tutorials +* Sample Output + + * `CLM `_ - land comparison against 3 CLM versions and 2 forcings + * `CMIP5 `_ - land comparison against a collection of CMIP5 models + * `IOMB `_ - ocean comparison against a few ocean models + +* Paper `preprint `_ which + details the design and methodology employed in the ILAMB package +* If you find the package or the ouput helpful in your research or + development efforts, we kindly ask you to cite the following + reference (DOI:10.18139/ILAMB.v002.00/1251621). + +ILAMB 2.3 Release +----------------- + +We are pleased to announce version 2.3 of the ILAMB python +package. Among many bugfixes and improvements we highlight these major +changes: + +* You may observe a large shift in some score values. In this version + we solidified our scoring methodology while writing a `paper + `_ which necesitated + reworking some of the scores. For details, see the linked paper. +* Made a memory optimization pass through the analysis routines. Peak + memory usage and the time at peak was reduced improving performance. +* Restructured the symbolic manipulation of derived variables to + greatly reduce the required memory. +* Moved from using cfunits to cf_units. Both are python wrappers + around the UDUNITS library, but cfunits is stagnant and placed a + lower limit to the version of the netCDF4 python wrappers we could + use. +* The scoring of the interannual variability was missed in the port + from version 1 to 2, we have added the metric. +* The terrestrial water storage anomaly GRACE metric was changed to + compare mean anomaly values over large river basins. For details see + the ILAMB paper. + + +Funding +------- + +This research was performed for the *Reducing Uncertainties in Biogeochemical Interactions through Synthesis and Computation* (RUBISCO) Scientific Focus Area, which is sponsored by the Regional and Global Climate Modeling (RGCM) Program in the Climate and Environmental Sciences Division (CESD) of the Biological and Environmental Research (BER) Program in the U.S. Department of Energy Office of Science. diff --git a/bin/ilamb-doctor b/bin/ilamb-doctor new file mode 100644 index 00000000..cceaa17e --- /dev/null +++ b/bin/ilamb-doctor @@ -0,0 +1,134 @@ +#!/usr/bin/env python +""" +""" +from ILAMB.ModelResult import ModelResult +from ILAMB.Scoreboard import Scoreboard +from ILAMB.Regions import Regions +from ILAMB import ilamblib as il +import os,time,sys,argparse +import numpy as np +import datetime,glob + +# Some color constants for printing to the terminal +OK = '\033[92m' +FAIL = '\033[91m' +ENDC = '\033[0m' + +def InitializeModels(model_root,models=[],verbose=False,filter="",model_year=[]): + """Initializes a list of models + + Initializes a list of models where each model is the subdirectory + beneath the given model root directory. The global list of models + will exist on each processor. + + Parameters + ---------- + model_root : str + the directory whose subdirectories will become the model results + models : list of str, optional + only initialize a model whose name is in this list + verbose : bool, optional + enable to print information to the screen + model_year : 2-tuple, optional + shift model years from the first to the second part of the tuple + + Returns + ------- + M : list of ILAMB.ModelResults.ModelsResults + a list of the model results, sorted alphabetically by name + + """ + # initialize the models + M = [] + if len(model_year) != 2: model_year = None + max_model_name_len = 0 + if verbose: print "\nSearching for model results in %s\n" % model_root + for subdir, dirs, files in os.walk(model_root): + for mname in dirs: + if len(models) > 0 and mname not in models: continue + M.append(ModelResult(os.path.join(subdir,mname), modelname = mname, filter=filter, model_year = model_year)) + max_model_name_len = max(max_model_name_len,len(mname)) + break + M = sorted(M,key=lambda m: m.name.upper()) + + # assign unique colors + clrs = il.GenerateDistinctColors(len(M)) + for m in M: + clr = clrs.pop(0) + m.color = clr + + # optionally output models which were found + if verbose: + for m in M: + print (" {0:>45}").format(m.name) + + if len(M) == 0: + if verbose: print "No model results found" + sys.exit(1) + + return M + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument('--model_root', dest="model_root", metavar='root', type=str, nargs=1, default=["./"], + help='root at which to search for models') +parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, + help='path to configuration file to use') +parser.add_argument('--models', dest="models", metavar='m', type=str, nargs='+',default=[], + help='specify which models to run, list model names with no quotes and only separated by a space.') +parser.add_argument('--model_year', dest="model_year", metavar='y0 yf', type=int, nargs='+',default=[], + help='set to shift model years, "--model_year y0 yf" will shift years from y0 to yf') +parser.add_argument('--confrontations', dest="confront", metavar='c', type=str, nargs='+',default=[], + help='specify which confrontations to run, list confrontation names with no quotes and only separated by a space.') +parser.add_argument('-q','--quiet', dest="quiet", action="store_true", + help='enable to silence screen output') +parser.add_argument('--filter', dest="filter", metavar='filter', type=str, nargs=1, default=[""], + help='a string which much be in the model filenames') +parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], + help='path of where to save the output') + +args = parser.parse_args() +if args.config is None: + print "\nError: You must specify a configuration file using the option --config\n" + sys.exit(1) + +M = InitializeModels(args.model_root[0],args.models,not args.quiet,filter=args.filter[0],model_year=args.model_year) +S = Scoreboard(args.config[0], + verbose = False, + build_dir = args.build_dir[0]) + +max_name_len = 45 +max_m_len = 0 +for m in M: max_m_len = max(max_m_len,len(m.name)) + +print """ +We will now look in each model for the variables in the ILAMB +configure file you specified (%s). The color green is used to reflect +which variables were found in the model. The color red is used to +reflect that a model is missing a required variable.\n""" % (args.config[0]) +for m in M: + for c in S.list(): + ands,ors = c.requires() + ok = False + if len(ands) == 0: + tf = [m.variables.has_key(v) for v in ors] + ok = any(tf) + out = [("\033[92m%s\033[0m" % v) if t else v for v,t in zip(ors,tf)] + if len(out) > 1: out[-1] = "or %s" % out[-1] + if len(out) <= 2: + out = " " .join(out) + else: + out = ", ".join(out) + else: + tf = [m.variables.has_key(v) for v in ands] + ok = all(tf) + out = [("\033[92m%s\033[0m" % v) if t else ("\033[91m%s\033[0m" % v) for v,t in zip(ands,tf)] + if len(out) > 1: out[-1] = "and %s" % out[-1] + if len(out) <= 2: + out = " " .join(out) + else: + out = ", ".join(out) + + if ok: + print (" {0:>%d}\033[92m {1:<%d}\033[0m %s" % (max_name_len,max_m_len,out)).format(c.longname,m.name) + else: + print (" {0:>%d}\033[91m {1:<%d}\033[0m %s" % (max_name_len,max_m_len,out)).format(c.longname,m.name) diff --git a/bin/ilamb-fetch b/bin/ilamb-fetch new file mode 100644 index 00000000..9ad1c781 --- /dev/null +++ b/bin/ilamb-fetch @@ -0,0 +1,97 @@ +#!/usr/bin/env python +import hashlib,argparse,os,sys,urllib + +def BuildDirectories(filepath): + d = os.path.dirname(filepath) + if not os.path.isdir(d): + os.makedirs(d) + +def filehash(filepath): + blocksize = 64*1024 + sha = hashlib.sha1() + with open(filepath, 'rb') as fp: + while True: + data = fp.read(blocksize) + if not data: + break + sha.update(data) + return sha.hexdigest() + +def GenerateSha1sumFile(root,suffix=".nc"): + + lines = "" + for topdir, dirs, files in os.walk(root): + for fpath in [os.path.join(topdir, f) for f in files]: + if not fpath.endswith(suffix): continue + size = os.path.getsize(fpath) + sha = filehash(fpath) + name = os.path.relpath(fpath, root) + lines += '%s %s\n' % (sha,name) + return lines + +def CheckSha1sumFile(sha1sumfile,root): + + needs_updating = [] + with file(sha1sumfile) as f: + lines = f.readlines() + for line in lines: + line = line.split() + sha1sum,filename = line + fpath = os.path.join(root,filename) + if os.path.isfile(fpath): + if sha1sum != filehash(fpath): needs_updating.append(filename) + else: + needs_updating.append(filename) + return needs_updating + +# default value is ILAMB_ROOT if set +local_root = "./" +if os.environ.has_key("ILAMB_ROOT"): local_root = os.environ["ILAMB_ROOT"] + +# parse options +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument('--local_root', dest="local_root", metavar="PATH", type=str, default=local_root, + help='Location on your system.') +parser.add_argument('--remote_root', dest="remote_root", metavar="PATH", type=str, default="http://ilamb.ornl.gov/ILAMB-Data/", + help='Location on the remote system.') +parser.add_argument('-c', '--create', dest="create", action="store_true", + help='Enable to create a sha1sum check file of the contents of the local root') +args = parser.parse_args() + +# use create mode if you want to make a checksum file of a directory +if args.create: + f = file(args.local_root + "/SHA1SUM",mode="w") + f.write(GenerateSha1sumFile(args.local_root)) + f.close() + sys.exit() + +print "\nComparing remote location:\n\n\t%s\n\nTo local location:\n\n\t%s" % (args.remote_root,args.local_root) + +# download and build the sha1sum check files +urllib.urlretrieve(args.remote_root + "/SHA1SUM", + args.local_root + "/SHA1SUM") +if "404 Not Found" in file(args.local_root + "/SHA1SUM").read(): + raise ValueError("Could not find the sha1 sum file: %s" % (args.remote_root + "/SHA1SUM")) +needs_updating = CheckSha1sumFile(args.local_root + "/SHA1SUM",args.local_root) + +if len(needs_updating) == 0: + print "\nAll your data is up-to-date and clean.\n" + os.system("rm -f " + args.local_root + "/SHA1SUM") + sys.exit() + +print "\nI found the following files which are missing, out of date, or corrupt:\n" +for key in needs_updating: + print "\t%s/%s" % (args.local_root,key) + +reply = str(raw_input('\nDownload replacements? [y/n] ')).lower().strip() +if reply[0] == 'y': + print " " + for key in needs_updating: + print "\tDownloading %s/%s..." % (args.remote_root,key) + BuildDirectories(args.local_root + "/" + key) + urllib.urlretrieve(args.remote_root + "/" + key, + args.local_root + "/" + key) + print "\nDownload complete. Rerun ilamb-fetch to check file integrity.\n" + +os.system("rm -f " + args.local_root + "/SHA1SUM") + diff --git a/bin/ilamb-mean b/bin/ilamb-mean new file mode 100644 index 00000000..e3e02681 --- /dev/null +++ b/bin/ilamb-mean @@ -0,0 +1,177 @@ +#!/usr/bin/env python +""" +Computes a multimodel mean. +""" +import os,sys,argparse +from ILAMB.ModelResult import ModelResult +from ILAMB.Variable import Variable +from netCDF4 import Dataset +import numpy as np + +def InitializeModels(model_root,models=[],verbose=False,filter="",model_year=[]): + """Initializes a list of models + + Initializes a list of models where each model is the subdirectory + beneath the given model root directory. The global list of models + will exist on each processor. + + Parameters + ---------- + model_root : str + the directory whose subdirectories will become the model results + models : list of str, optional + only initialize a model whose name is in this list + verbose : bool, optional + enable to print information to the screen + model_year : 2-tuple, optional + shift model years from the first to the second part of the tuple + + Returns + ------- + M : list of ILAMB.ModelResults.ModelsResults + a list of the model results, sorted alphabetically by name + + """ + # initialize the models + M = [] + if len(model_year) != 2: model_year = None + max_model_name_len = 0 + if verbose: print "\nSearching for model results in %s\n" % model_root + for subdir, dirs, files in os.walk(model_root): + for mname in dirs: + if len(models) > 0 and mname not in models: continue + M.append(ModelResult(os.path.join(subdir,mname), modelname = mname, filter=filter, model_year = model_year)) + max_model_name_len = max(max_model_name_len,len(mname)) + break + M = sorted(M,key=lambda m: m.name.upper()) + + # optionally output models which were found + if verbose: + for m in M: + print (" {0:>45}").format(m.name) + + if len(M) == 0: + if verbose: print "No model results found" + sys.exit(0) + + return M + + +def CombineModelVars(var,res=1.0): + """ + + """ + m0 = var.keys()[0] + t0 = -1e20; tf = +1e20 + lat0 = -1e20; latf = +1e20 + lon0 = -1e20; lonf = +1e20 + dep0 = -1e20; depf = +1e20 + for m in var: + var[m].convert(var[m0].unit) + assert var[m].data.ndim == var[m0].data.ndim + if var[m].time is not None: + t0 = max(var[m].time_bnds.min(),t0) + tf = min(var[m].time_bnds.max(),tf) + if var[m].lat is not None: + lat0 = max(var[m].lat_bnds.min(),lat0) + latf = min(var[m].lat_bnds.max(),latf) + if var[m].lon is not None: + lon0 = max(var[m].lon_bnds.min(),lon0) + lonf = min(var[m].lon_bnds.max(),lonf) + if var[m].depth is not None: + dep0 = max(var[m].depth_bnds.min(),dep0) + depf = min(var[m].depth_bnds.max(),depf) + lat0 = max(lat0,- 90.); latf = min(latf,+ 90.) + lon0 = max(lon0,-180.); lonf = min(lonf,+180.) + + # Create space/time grid + var[m0].trim(t=[t0,tf]) + t = np.copy(var[m0].time) + t_bnds = np.copy(var[m0].time_bnds) + lat_bnds = np.arange(lat0,latf+res/2.,res) + lon_bnds = np.arange(lon0,lonf+res/2.,res) + lat = 0.5*(lat_bnds[:-1]+lat_bnds[1:]) + lon = 0.5*(lon_bnds[:-1]+lon_bnds[1:]) + lat_bnd = np.zeros((lat.size,2)) + lon_bnd = np.zeros((lon.size,2)) + lat_bnd[:,0] = lat_bnds[:-1]; lat_bnd[:,1] = lat_bnds[+1:] + lon_bnd[:,0] = lon_bnds[:-1]; lon_bnd[:,1] = lon_bnds[+1:] + + shp = () + if t0 > -1e20: shp += (t .size,) + if lat0 > -1e20: shp += (lat.size,) + if lon0 > -1e20: shp += (lon.size,) + dsum = np.zeros(shp) + count = np.zeros(shp,dtype=int) + + for m in var: + print " Averaging in %s..." % m + intv = var[m].interpolate(time=t,lat=lat,lon=lon) + dsum += (intv.data.mask==0)*intv.data + count += (intv.data.mask==0) + + dsum = np.ma.masked_array(dsum,mask=(count==0)) + dsum /= count.clip(1) + return Variable(data = dsum, + unit = var[m0].unit, + name = var[m0].name, + time = t, + time_bnds = t_bnds, + lat = lat, + lat_bnds = lat_bnd, + lon = lon, + lon_bnds = lon_bnd) + +def CreateMeanModel(M,res=1.0): + """ + + """ + def _keep(v): + for keep in ["_bnds","time","lat","lon","layer","depth","lev","areacella"]: + if keep in v: return False + return True + + # Find a list of variables across all models + Vs = [] + for m in M: Vs += [v for v in m.variables.keys() if ((v not in Vs) and (_keep(v)))] + + # Create space/time grid + lat_bnds = np.arange(- 90, 90+res/2.,res) + lon_bnds = np.arange(-180,180+res/2.,res) + lat = 0.5*(lat_bnds[:-1]+lat_bnds[1:]) + lon = 0.5*(lon_bnds[:-1]+lon_bnds[1:]) + lat_bnd = np.zeros((lat.size,2)) + lon_bnd = np.zeros((lon.size,2)) + lat_bnd[:,0] = lat_bnds[:-1]; lat_bnd[:,1] = lat_bnds[+1:] + lon_bnd[:,0] = lon_bnds[:-1]; lon_bnd[:,1] = lon_bnds[+1:] + + for v in Vs: + print v + var = {} + try: + for m in M: + if not m.variables.has_key(v): continue + print " Reading from %s..." % m.name + var[m.name] = m.extractTimeSeries(v) + if len(var) == 1: raise ValueError + mean = CombineModelVars(var) + with Dataset("%s_MeanModel.nc" % v,mode="w") as dset: + mean.toNetCDF4(dset) + print "Writing %s_MeanModel.nc...\n" % v + except: + print "Failed to create %s\n" % v + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument('--model_root', dest="model_root", metavar='root', type=str, nargs=1, default=["./"], + help='root at which to search for models') +parser.add_argument('--models', dest="models", metavar='m', type=str, nargs='+',default=[], + help='specify which models to run, list model names with no quotes and only separated by a space.') +parser.add_argument('--filter', dest="filter", metavar='filter', type=str, nargs=1, default=[""], + help='a string which much be in the model filenames') +parser.add_argument('-q','--quiet', dest="quiet", action="store_true", + help='enable to silence screen output') + +args = parser.parse_args() + +M = InitializeModels(args.model_root[0],args.models,not args.quiet,filter=args.filter[0]) +CreateMeanModel(M) diff --git a/bin/ilamb-run b/bin/ilamb-run new file mode 100644 index 00000000..03f7b9d5 --- /dev/null +++ b/bin/ilamb-run @@ -0,0 +1,574 @@ +#!/usr/bin/env python +""" +Runs an ILAMB study. +""" +import logging +from ILAMB.ModelResult import ModelResult +from ILAMB.Scoreboard import Scoreboard +from ILAMB.Regions import Regions +from ILAMB import ilamblib as il +from traceback import format_exc +import os,time,sys,argparse +from mpi4py import MPI +import numpy as np +import datetime,glob + +# MPI stuff +comm = MPI.COMM_WORLD +size = comm.Get_size() +rank = comm.Get_rank() +proc = np.zeros(size) + +# Some color constants for printing to the terminal +OK = '\033[92m' +FAIL = '\033[91m' +ENDC = '\033[0m' + +def InitializeModels(model_root,models=[],verbose=False,filter="",model_year=[]): + """Initializes a list of models + + Initializes a list of models where each model is the subdirectory + beneath the given model root directory. The global list of models + will exist on each processor. + + Parameters + ---------- + model_root : str + the directory whose subdirectories will become the model results + models : list of str, optional + only initialize a model whose name is in this list + verbose : bool, optional + enable to print information to the screen + model_year : 2-tuple, optional + shift model years from the first to the second part of the tuple + + Returns + ------- + M : list of ILAMB.ModelResults.ModelsResults + a list of the model results, sorted alphabetically by name + + """ + # initialize the models + M = [] + if len(model_year) != 2: model_year = None + max_model_name_len = 0 + if rank == 0 and verbose: print "\nSearching for model results in %s\n" % model_root + for subdir, dirs, files in os.walk(model_root): + for mname in dirs: + if len(models) > 0 and mname not in models: continue + M.append(ModelResult(os.path.join(subdir,mname), modelname = mname, filter=filter, model_year = model_year)) + max_model_name_len = max(max_model_name_len,len(mname)) + break + M = sorted(M,key=lambda m: m.name.upper()) + + # assign unique colors + clrs = il.GenerateDistinctColors(len(M)) + for m in M: + clr = clrs.pop(0) + m.color = clr + + # optionally output models which were found + if rank == 0 and verbose: + for m in M: + print (" {0:>45}").format(m.name) + + if len(M) == 0: + if verbose and rank == 0: print "No model results found" + comm.Barrier() + comm.Abort(0) + + return M + +def ParseModelSetup(model_setup,models=[],verbose=False,filter=""): + """Initializes a list of models + + Initializes a list of models where each model is the subdirectory + beneath the given model root directory. The global list of models + will exist on each processor. + + Parameters + ---------- + model_setup : str + the directory whose subdirectories will become the model results + models : list of str, optional + only initialize a model whose name is in this list + verbose : bool, optional + enable to print information to the screen + + Returns + ------- + M : list of ILAMB.ModelResults.ModelsResults + a list of the model results, sorted alphabetically by name + + """ + # initialize the models + M = [] + max_model_name_len = 0 + if rank == 0 and verbose: print "\nSetting up model results from %s\n" % model_setup + with file(model_setup) as f: + for line in f.readlines(): + if line.strip().startswith("#"): continue + line = line.split(",") + mname = None + mdir = None + model_year = None + if len(line) >= 2: + mname = line[0].strip() + mdir = line[1].strip() + # if mdir not a directory, then maybe path is relative to ILAMB_ROOT + if not os.path.isdir(mdir): + mdir = os.path.join(os.environ["ILAMB_ROOT"],mdir).strip() + if len(line) == 4: + model_year = [float(line[2].strip()),float(line[3].strip())] + max_model_name_len = max(max_model_name_len,len(mname)) + if (len(models) > 0 and mname not in models) or (mname is None): continue + M.append(ModelResult(mdir, modelname = mname, filter=filter, model_year = model_year)) + + #M = sorted(M,key=lambda m: m.name.upper()) + + # assign unique colors + clrs = il.GenerateDistinctColors(len(M)) + for m in M: + clr = clrs.pop(0) + m.color = clr + + # optionally output models which were found + if rank == 0 and verbose: + for m in M: + print (" {0:>45}").format(m.name) + + if len(M) == 0: + if verbose and rank == 0: print "No model results found" + comm.Barrier() + comm.Abort(0) + + return M + +def InitializeRegions(filenames): + """Initialize regions from a list of files. + + If the file is a netCDF4 file, see documentation in + ILAMB.Regions.addRegionNetCDF4 for details on the required + format. If the file defines regions by latitude/longitude bounds, + then we anticipate comma delimited rows in the following form: + + shortname, longname, min lat, max lat, min lon, max lon + + Note that latitudes should be on (-90,90) and longitudes on + (-180,180). + + Parameters + ---------- + filenames : list of str + a list of files from which to search for regions + + """ + r = Regions() + for filename in filenames: + try: + r.addRegionNetCDF4(filename) + except IOError: + for line in file(filename): + line = line.strip() + if line.startswith("#"): continue + line = line.split(",") + if len(line) == 6: + r.addRegionLatLonBounds(line[0].strip(), + line[1].strip(), + [float(line[2]),float(line[3])], + [float(line[4]),float(line[5])]) + +def MatchRelationshipConfrontation(C): + """Match relationship strings to confrontation longnames + + We allow for relationships to be studied by specifying the + confrontation longname in the configure file. This routine loops + over all defined relationships and finds the matching + confrontation. (NOTE: this really belongs inside the Scoreboard + object) + + Parameters + ---------- + C : list of ILAMB.Confrontation.Confrontation + the confrontation list + + Returns + ------- + C : list of ILAMB.Confrontation.Confrontation + the same list with relationships linked to confrontations + """ + for c in C: + if c.relationships is None: continue + for i,longname in enumerate(c.relationships): + found = False + for cor in C: + if longname.lower() == cor.longname.lower(): + c.relationships[i] = cor + found = True + return C + +def FilterConfrontationList(C,match_list): + """Filter the confrontation list + + Filter the confrontation list by requiring that at least one + string in the input list is found in the longname in the + confrontation. + + Parameters + ---------- + C : list of ILAMB.Confrontation.Confrontation + the source list of confrontations + match_list : list of str + the list of strings + + Returns + ------- + Cf : list of ILAMB.Confrontation.Confrontation + the list of filtered confrontations + """ + if len(match_list) == 0: return C + Cf = [] + for c in C: + for match in match_list: + if match in c.longname: Cf.append(c) + return Cf + +def BuildLocalWorkList(M,C): + """Build the local work list + + We enumerate a list of work by taking combinations of model + results and confrontations. This list is partitioned evenly among + processes preferring to cluster as many confrontations with the + same name together. While the work of the model-confrontation pair + is local, some post-processing operations need performed once per + confrontation. Thus we also need to flag one instance of each + confrontation as the master process. + + Parameters + ---------- + M : list of ILAMB.ModelResult.ModelResult + list of models to analyze + C : list of ILAMB.Confrontation.Confrontation + list of confrontations + + Returns + ------- + localW : list of (ILAMB.ModelResult.ModelResult, ILAMB.Confrontation.Confrontation) tuples + the work local to this process + """ + + # Evenly divide up the work among processes + W = [] + for c in C: + for m in M: + W.append([m,c]) + wpp = float(len(W))/size + begin = int(round( rank *wpp)) + end = int(round((rank+1)*wpp)) + localW = W[begin:end] + + # Determine who is the master of each confrontation + for c in C: + sendbuf = np.zeros(size,dtype='int') + for w in localW: + if c is w[1]: sendbuf[rank] += 1 + recvbuf = None + if rank == 0: recvbuf = np.empty([size, sendbuf.size],dtype='int') + comm.Gather(sendbuf,recvbuf,root=0) + if rank == 0: + numc = recvbuf.sum(axis=1) + else: + numc = np.empty(size,dtype='int') + comm.Bcast(numc,root=0) + if rank == numc.argmax(): + c.master = True + else: + c.master = False + + return localW + +def WorkConfront(W,verbose=False,clean=False): + """Performs the confrontation analysis + + For each model-confrontation pair (m,c) in the input work list, + this routine will call c.confront(m) and keep track of the time + required as well as any exceptions which are thrown. + + Parameters + ---------- + W : list of (ILAMB.ModelResult.ModelResult, ILAMB.Confrontation.Confrontation) tuples + the list of work + verbose : bool, optional + enable to print output to the screen monitoring progress + clean : bool, optional + enable to perform the confrontation again, overwriting previous results + + """ + maxCL = 45; maxML = 20 + + # Run analysis on your local work model-confrontation pairs + for w in W: + m,c = w + + # if the results file exists, skip this confrontation unless we want to clean + if os.path.isfile(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name))) and clean is False: + if verbose: + print (" {0:>%d} {1:<%d} %sUsingCachedData%s " % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name) + sys.stdout.flush() + continue + + # try to run the confrontation + try: + t0 = time.time() + c.confront(m) + dt = time.time()-t0 + proc[rank] += dt + if verbose: + print (" {0:>%d} {1:<%d} %sCompleted%s {2:>5.1f} s" % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name,dt) + sys.stdout.flush() + + # if things do not work out, print the exception so the user has some idea + except Exception as ex: + logger.debug("[%s][%s]\n%s" % (c.longname,m.name,format_exc())) + if verbose: + print (" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name) + +def WorkPost(M,C,W,S,verbose=False,skip_plots=False): + """Performs the post-processing + + Determines plot limits across all models, makes plots, generates + other forms of HTML output. + + Parameters + ---------- + M : list of ILAMB.ModelResult.ModelResult + list of models to analyze + C : list of ILAMB.Confrontation.Confrontation + list of confrontations + W : list of (ILAMB.ModelResult.ModelResult, ILAMB.Confrontation.Confrontation) tuples + the list of work + S : ILAMB.Scoreboard.Scoreboard + the scoreboard context + verbose : bool, optional + enable to print output to the screen monitoring progress + skip_plots : bool, optional + enable to skip plotting + """ + maxCL = 45; maxML = 20 + + # work done on just the master confrontation + for c in C: c.determinePlotLimits() + + for w in W: + m,c = w + try: + t0 = time.time() + if not skip_plots: + c.modelPlots(m) + c.sitePlots(m) + c.computeOverallScore(m) + dt = time.time()-t0 + proc[rank] += dt + if verbose: + print (" {0:>%d} {1:<%d} %sCompleted%s {2:>5.1f} s" % (maxCL,maxML,OK,ENDC)).format(c.longname,m.name,dt) + sys.stdout.flush() + except Exception as ex: + logger.debug("[%s][%s]\n%s" % (c.longname,m.name,format_exc())) + if verbose: + print (" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name) + sys.stdout.flush() + + comm.Barrier() + for c in C: + if not skip_plots: + try: + c.compositePlots() + except Exception as ex: + logger.debug("[compositePlots][%s]\n%s" % (c.longname,format_exc())) + c.generateHtml() + + comm.Barrier() + if rank==0: + S.createHtml(M) + S.createSummaryFigure(M) + +def RestrictiveModelExtents(M,eps=2.): + extents0 = np.asarray([[-90.,+90.],[-180.,+180.]]) + extents = extents0.copy() + for m in M: + for i in range(2): + extents[i,0] = max(extents[i,0],m.extents[i,0]) + extents[i,1] = min(extents[i,1],m.extents[i,1]) + diff = np.abs(extents0-extents) + extents = (diff<=eps)*extents0 + (diff>eps)*extents + return extents + +class MPIStream(): + """ + The MPI.File stream doesn't have the functions we need, so we will + wrap what we need in a simple class. + """ + def __init__(self, comm, filename, mode): + self.fh = MPI.File.Open(comm, filename, mode) + self.fh.Set_atomicity(True) + + def write(self,buf): + self.fh.Write_shared(buf) + + def flush(self): + self.fh.Sync() + + def close(self): + self.fh.Close() + +class MPIFileHandler(logging.FileHandler): + """ + A handler class which writes formatted logging records to disk files. + """ + def __init__(self, filename, mode = MPI.MODE_WRONLY|MPI.MODE_CREATE, delay = 0, comm = MPI.COMM_WORLD): + """ + Open the specified file and use it as the stream for logging. + """ + self.baseFilename = os.path.abspath(filename) + self.mode = mode + self.encoding = None + self.delay = delay + self.comm = comm + if delay: + logging.Handler.__init__(self) + self.stream = None + else: + logging.StreamHandler.__init__(self, self._open()) + + def _open(self): + """ + Open the current base file with the (original) mode and encoding. + Return the resulting stream. + """ + stream = MPIStream(self.comm, self.baseFilename, self.mode ) + return stream + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument('--model_root', dest="model_root", metavar='root', type=str, nargs=1, default=["./"], + help='root at which to search for models') +parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, + help='path to configuration file to use') +parser.add_argument('--models', dest="models", metavar='m', type=str, nargs='+',default=[], + help='specify which models to run, list model names with no quotes and only separated by a space.') +parser.add_argument('--model_year', dest="model_year", metavar='y0 yf', type=int, nargs='+',default=[], + help='set to shift model years, "--model_year y0 yf" will shift years from y0 to yf') +parser.add_argument('--confrontations', dest="confront", metavar='c', type=str, nargs='+',default=[], + help='specify which confrontations to run, list confrontation names with no quotes and only separated by a space.') +parser.add_argument('--regions', dest="regions", metavar='r', type=str, nargs='+',default=['global'], + help='specify which regions to compute over') +parser.add_argument('--clean', dest="clean", action="store_true", + help='enable to remove analysis files and recompute') +parser.add_argument('--disable_logging', dest="logging", action="store_false", + help='disables logging') +parser.add_argument('-q','--quiet', dest="quiet", action="store_true", + help='enable to silence screen output') +parser.add_argument('--filter', dest="filter", metavar='filter', type=str, nargs=1, default=[""], + help='a string which much be in the model filenames') +parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], + help='path of where to save the output') +parser.add_argument('--define_regions', dest="define_regions", type=str, nargs='+',default=[], + help='list files containing user-defined regions') +parser.add_argument('--model_setup', dest="model_setup", type=str, nargs='+',default=None, + help='list files model setup information') +parser.add_argument('--skip_plots', dest="skip_plots", action="store_true", + help='enable to skip the plotting phase') +parser.add_argument('--rel_only', dest="rel_only", action="store_true", + help='enable only display relative differences in overall scores') +args = parser.parse_args() +if args.config is None: + if rank == 0: + print "\nError: You must specify a configuration file using the option --config\n" + comm.Barrier() + comm.Abort(1) + +# Setup regions +r = Regions() +InitializeRegions(args.define_regions) +missing = [] +for region in args.regions: + if region not in r.regions: missing.append(region) +if len(missing) > 0: + raise ValueError("Unable to find the following regions %s from the following list of possible regions %s" % (missing,r.regions)) + +# Setup study +T0 = time.time() +if args.model_setup is None: + M = InitializeModels(args.model_root[0],args.models,not args.quiet,filter=args.filter[0],model_year=args.model_year) +else: + M = ParseModelSetup(args.model_setup[0],args.models,not args.quiet,filter=args.filter[0]) +if rank == 0 and not args.quiet: print "\nParsing config file %s...\n" % args.config[0] +S = Scoreboard(args.config[0], + regions = args.regions, + master = rank==0, + verbose = not args.quiet, + build_dir = args.build_dir[0], + extents = RestrictiveModelExtents(M), + rel_only = args.rel_only) +C = MatchRelationshipConfrontation(S.list()) +Cf = FilterConfrontationList(C,args.confront) + +# Setup logging +logger = logging.getLogger("%i" % comm.rank) +formatter = logging.Formatter('[%(levelname)s][%(name)s][%(funcName)s]%(message)s') +logger.setLevel(logging.DEBUG) +if args.logging: + mh = MPIFileHandler('%s/ILAMB%02d.log' % (S.build_dir,len(glob.glob("%s/*.log" % S.build_dir))+1)) + mh.setFormatter(formatter) + logger.addHandler(mh) + +if rank == 0: + logger.info(" " + " ".join(os.uname())) + for key in ["ILAMB","numpy","matplotlib","netCDF4","cf_units","sympy","mpi4py"]: + pkg = __import__(key) + try: + path = pkg.__path__[0] + except: + path = key + logger.info(" %s (%s)" % (path,pkg.__version__)) + logger.info(" %s" % datetime.datetime.now()) + +if rank == 0 and not args.quiet and len(Cf) != len(C): + print "\nWe filtered some confrontations, actually running...\n" + for c in Cf: print (" {0:>45}").format(c.longname) +C = Cf + +sys.stdout.flush(); comm.Barrier() + +if rank==0 and not args.quiet: print "\nRunning model-confrontation pairs...\n" + +sys.stdout.flush(); comm.Barrier() + +W = BuildLocalWorkList(M,C) +WorkConfront(W,not args.quiet,args.clean) + +sys.stdout.flush(); comm.Barrier() + +if rank==0 and not args.quiet: print "\nFinishing post-processing which requires collectives...\n" + +sys.stdout.flush(); comm.Barrier() + +WorkPost(M,C,W,S,not args.quiet,args.skip_plots) + +sys.stdout.flush(); comm.Barrier() + +# Runtime information +proc_reduced = np.zeros(proc.shape) +comm.Reduce(proc,proc_reduced,root=0) +if size > 1: logger.info("[process time] %.1f s" % proc[rank]) +if rank==0: + logger.info("[total time] %.1f s" % (time.time()-T0)) + if size > 1: + if proc_reduced.min() > 1e-6: + logger.info("[process balance] %.2f" % (proc_reduced.max()/proc_reduced.min())) + else: + logger.info("[process balance] nan") + logger.info("[parallel efficiency] %.0f%%" % (100.*proc_reduced.sum()/float(size)/(time.time()-T0))) + +if rank==0: S.dumpScores(M,"scores.csv") + +if rank==0 and not args.quiet: print "\nCompleted in {0:>5.1f} s\n".format(time.time()-T0) diff --git a/bin/ilamb-table b/bin/ilamb-table new file mode 100644 index 00000000..374b6abb --- /dev/null +++ b/bin/ilamb-table @@ -0,0 +1,65 @@ +#!/usr/bin/env python +""" +""" +from ILAMB.Scoreboard import Scoreboard +from netCDF4 import Dataset +import os,argparse,sys + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, + help='path to configuration file to use') +parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], + help='path of where to save the output') +parser.add_argument('--csv_file', dest="csv", metavar='csv', type=str, nargs=1,default=["table.csv"], + help='destination filename for the table') + +args = parser.parse_args() +if args.config is None: + print "\nError: You must specify a configuration file using the option --config\n" + sys.exit(1) + +S = Scoreboard(args.config[0],verbose=False,build_dir=args.build_dir[0]) + +region = "global" +scalar = "RMSE" +sname = "%s %s" % (scalar,region) +group = "MeanState" +table = {} +unit = {} +for c in S.list(): + for subdir, dirs, files in os.walk(c.output_path): + for fname in files: + if not fname.endswith(".nc"): continue + with Dataset(os.path.join(c.output_path,fname)) as dset: + if group not in dset.groups .keys(): continue + if "scalars" not in dset.groups[group].groups.keys(): continue + grp = dset.groups[group]["scalars"] + if sname not in grp.variables.keys(): continue + var = grp.variables[sname] + if not table.has_key(c.longname): + table[c.longname] = {} + unit [c.longname] = var.units + table[c.longname][dset.name] = var[...] + +# What models have data? +models = [] +for key in table.keys(): + for m in table[key].keys(): + if m not in models: models.append(m) +models.sort() + +# render a table of values in csv format +lines = ",".join(["Name","Units"] + models) +for c in S.list(): + if not table.has_key(c.longname): continue + line = "%s,%s" % (c.longname,unit[c.longname]) + for m in models: + if table[c.longname].has_key(m): + line += ",%g" % (table[c.longname][m]) + else: + line += "," + lines += "\n%s" % line + +with file(args.csv[0],mode="w") as f: + f.write(lines) + diff --git a/demo/diurnal.cfg b/demo/diurnal.cfg new file mode 100644 index 00000000..6bc6833b --- /dev/null +++ b/demo/diurnal.cfg @@ -0,0 +1,76 @@ + +#[h1: Ecosystem and Carbon Cycle] +#bgcolor = "#ECFFE6" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +#[h2: Gross Primary Productivity] +#variable = "gpp" + +#[Ameriflux] +#source = "DATA/Ameriflux/gpp_ameriflux_hh.nc" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +#[h2: Net Ecosystem Exchange] +#variable = "nee" +#derived = "gpp-ra-rh" + +#[Ameriflux] +#source = "DATA/Ameriflux/nee_ameriflux_hh.nc" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +#[h2: Ecosystem Respiration] +#variable = "reco" +#derived = "ra+rh" + +#[Ameriflux] +#source = "DATA/Ameriflux/reco_ameriflux_hh.nc" + +########################################################################### + +[h1: Hydrology Cycle] +bgcolor = "#E6F9FF" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Latent Heat] +variable = "hfls" +ctype = ConfDiurnal + +[AmerifluxHR] +source = "DATA/Ameriflux/hfls_ameriflux_hr.nc" + +[AmerifluxHH] +source = "DATA/Ameriflux/hfls_ameriflux_hh.nc" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Sensible Heat] +variable = "hfss" +ctype = ConfDiurnal + +[AmerifluxHR] +source = "DATA/Ameriflux/hfss_ameriflux_hr.nc" + +[AmerifluxHH] +source = "DATA/Ameriflux/hfss_ameriflux_hh.nc" + +########################################################################### + +[h1: Forcings] +bgcolor = "#EDEDED" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Air Temperature] +variable = "tas" +ctype = ConfDiurnal + +[AmerifluxHR] +source = "DATA/Ameriflux/tas_ameriflux_hr.nc" + +[AmerifluxHH] +source = "DATA/Ameriflux/tas_ameriflux_hh.nc" + diff --git a/demo/ilamb.cfg b/demo/ilamb.cfg new file mode 100644 index 00000000..793227fd --- /dev/null +++ b/demo/ilamb.cfg @@ -0,0 +1,551 @@ +# This configure file uses observational data which can be obtained by +# running the following command after exporting ILAMB_ROOT to the +# appropriate location. +# +# ilamb-fetch --remote_root http://ilamb.ornl.gov/ILAMB-Data +# +[h1: Ecosystem and Carbon Cycle] +bgcolor = "#ECFFE6" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Biomass] +variable = "biomass" +alternate_vars = "cVeg" +weight = 5 +skip_rmse = True +mass_weighting = True + +[GEOCARBON] +source = "DATA/biomass/GEOCARBON/biomass_0.5x0.5.nc" +weight = 20 +table_unit = "Pg" +plot_unit = "kg m-2" +space_mean = False + +[Tropical] +source = "DATA/biomass/Tropical/biomass_0.5x0.5.nc" +weight = 20 +table_unit = "Pg" +plot_unit = "kg m-2" +space_mean = False + +[GlobalCarbon] +source = "DATA/biomass/GLOBAL.CARBON/biomass_0.5x0.5.nc" +weight = 16 +table_unit = "Pg" +plot_unit = "kg m-2" +space_mean = False + +[NBCD2000] +source = "DATA/biomass/NBCD2000/biomass_0.5x0.5.nc" +weight = 8 +table_unit = "Pg" +plot_unit = "kg m-2" +space_mean = False + +[USForest] +source = "DATA/biomass/US.FOREST/biomass_0.5x0.5.nc" +weight = 8 +table_unit = "Pg" +plot_unit = "kg m-2" +space_mean = False + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Burned Area] +variable = "burntArea" +weight = 4 +cmap = "OrRd" +mass_weighting = True + +[GFED4S] +source = "DATA/burntArea/GFED4S/burntArea_0.5x0.5.nc" +weight = 20 +relationships = "Precipitation/GPCP2","SurfaceAirTemperature/CRU" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Gross Primary Productivity] +variable = "gpp" +cmap = "Greens" +weight = 5 +mass_weighting = True + +[Fluxnet] +source = "DATA/gpp/FLUXNET/gpp.nc" +weight = 9 +table_unit = "g m-2 d-1" +plot_unit = "g m-2 d-1" + +[GBAF] +source = "DATA/gpp/GBAF/gpp_0.5x0.5.nc" +weight = 15 +table_unit = "Pg yr-1" +plot_unit = "g m-2 d-1" +space_mean = False +skip_iav = True +relationships = "Evapotranspiration/GLEAM","Precipitation/GPCP2","SurfaceDownwardSWRadiation/CERES","SurfaceNetSWRadiation/CERES","SurfaceAirTemperature/CRU" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Leaf Area Index] +variable = "lai" +cmap = "Greens" +weight = 3 +mass_weighting = True + +[AVHRR] +source = "DATA/lai/AVHRR/lai_0.5x0.5.nc" +weight = 15 +relationships = "Precipitation/GPCP2" + +[MODIS] +source = "DATA/lai/MODIS/lai_0.5x0.5.nc" +weight = 15 +relationships = "Precipitation/GPCP2" +skip_iav = True + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Global Net Ecosystem Carbon Balance] +variable = "nbp" +weight = 5 +ctype = "ConfNBP" + +[GCP] +source = "DATA/nbp/GCP/nbp_1959-2012.nc" +weight = 20 + +[Hoffman] +source = "DATA/nbp/HOFFMAN/nbp_1850-2010.nc" +weight = 20 +skip_taylor = True + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Net Ecosystem Exchange] +variable = "nee" +derived = "gpp-ra-rh" +weight = 5 +mass_weighting = True + +[Fluxnet] +source = "DATA/nee/FLUXNET/nee.nc" +weight = 9 +table_unit = "g m-2 d-1" +plot_unit = "g m-2 d-1" + +[GBAF] +source = "DATA/nee/GBAF/nee_0.5x0.5.nc" +weight = 4 +table_unit = "Pg yr-1" +plot_unit = "g m-2 d-1" +space_mean = False +skip_iav = True + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Ecosystem Respiration] +variable = "reco" +derived = "ra+rh" +weight = 4 +mass_weighting = True + +[Fluxnet] +source = "DATA/reco/FLUXNET/reco.nc" +weight = 6 +table_unit = "g m-2 d-1" +plot_unit = "g m-2 d-1" + +[GBAF] +source = "DATA/reco/GBAF/reco_0.5x0.5.nc" +weight = 4 +table_unit = "Pg yr-1" +plot_unit = "g m-2 d-1" +space_mean = False +skip_iav = True + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Soil Carbon] +variable = "cSoil" +alternate_vars = "soilc" +weight = 5 +mass_weighting = True + +[HWSD] +source = "DATA/soilc/HWSD/soilc_0.5x0.5.nc" +weight = 15 +table_unit = "Pg" +plot_unit = "kg m-2" +space_mean = False +skip_rmse = True + +[NCSCDV22] +source = "DATA/soilc/NCSCDV22/soilc_0.5x0.5.nc" +weight = 12 +table_unit = "Pg" +plot_unit = "kg m-2" +space_mean = False +skip_rmse = True + +########################################################################### + +[h1: Hydrology Cycle] +bgcolor = "#E6F9FF" + +[h2: Evapotranspiration] +variable = "et" +alternate_vars = "evspsbl" +cmap = "Blues" +weight = 5 +mass_weighting = True + +[GLEAM] +source = "DATA/et/GLEAM/et_0.5x0.5.nc" +weight = 15 +table_unit = "mm d-1" +plot_unit = "mm d-1" +relationships = "Precipitation/GPCP2","SurfaceAirTemperature/CRU" + +[MODIS] +source = "DATA/et/MODIS/et_0.5x0.5.nc" +weight = 15 +table_unit = "mm d-1" +plot_unit = "mm d-1" +relationships = "Precipitation/GPCP2","SurfaceAirTemperature/CRU" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Evaporative Fraction] +variable = "EvapFrac" +weight = 5 +mass_weighting = True +ctype = "ConfEvapFraction" + +[GBAF] +source = "DATA/EvapFrac/GBAF/EvapFrac_0.5x0.5.nc" +weight = 9 +skip_rmse = True +skip_iav = True +limit_type = "99per" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Latent Heat] +variable = "hfls" +alternate_vars = "le" +cmap = "Oranges" +weight = 5 +mass_weighting = True + +[Fluxnet] +source = "DATA/le/FLUXNET/le.nc" +weight = 3 + +[GBAF] +source = "DATA/le/GBAF/le_0.5x0.5.nc" +land = True +weight = 9 +skip_iav = True + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Runoff] +variable = "runoff" +alternate_vars = "mrro" +weight = 5 + +[Dai] +ctype = "ConfRunoff" +source = "DATA/runoff/Dai/runoff.nc" +weight = 15 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Sensible Heat] +variable = "hfss" +alternate_vars = "sh" +weight = 2 +mass_weighting = True + +[Fluxnet] +source = "DATA/sh/FLUXNET/sh.nc" +weight = 9 + +[GBAF] +source = "DATA/sh/GBAF/sh_0.5x0.5.nc" +weight = 15 +skip_iav = True + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Terrestrial Water Storage Anomaly] +variable = "twsa" +alternate_vars = "tws" +derived = "pr-evspsbl-mrro" +cmap = "Blues" +weight = 5 +ctype = "ConfTWSA" + +[GRACE] +source = "DATA/twsa/GRACE/twsa_0.5x0.5.nc" +weight = 25 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Snow Water Equivalent] +variable = "swe" +alternate_vars = "snw" +cmap = "Blues" +weight = 5 + +[CanSISE] +source = "DATA/swe/CanSISE/swe.nc" +weight = 25 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Permafrost] +variable = "tsl" + +[NSIDC] +ctype = "ConfPermafrost" +source = "DATA/permafrost/NSIDC/NSIDC_0.5x0.5.nc" +y0 = 1970. +yf = 2000. +Teps = 273.15 +dmax = 3.5 + +########################################################################### + +[h1: Radiation and Energy Cycle] +bgcolor = "#FFECE6" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Albedo] +variable = "albedo" +derived = "rsus/rsds" +weight = 1 + +[CERES] +source = "DATA/albedo/CERES/albedo_0.5x0.5.nc" +weight = 20 + +[GEWEX.SRB] +source = "DATA/albedo/GEWEX.SRB/albedo_0.5x0.5.nc" +weight = 20 + +[MODIS] +source = "DATA/albedo/MODIS/albedo_0.5x0.5.nc" +weight = 20 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Upward SW Radiation] +variable = "rsus" +weight = 1 + +[CERES] +source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" +weight = 15 + +[GEWEX.SRB] +source = "DATA/rsus/GEWEX.SRB/rsus_0.5x0.5.nc" +weight = 15 + +[WRMC.BSRN] +source = "DATA/rsus/WRMC.BSRN/rsus.nc" +weight = 12 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Net SW Radiation] +variable = "rsns" +derived = "rsds-rsus" +weight = 1 + +[CERES] +source = "DATA/rsns/CERES/rsns_0.5x0.5.nc" +weight = 15 + +[GEWEX.SRB] +source = "DATA/rsns/GEWEX.SRB/rsns_0.5x0.5.nc" +weight = 15 + +[WRMC.BSRN] +source = "DATA/rsns/WRMC.BSRN/rsns.nc" +weight = 12 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Upward LW Radiation] +variable = "rlus" +weight = 1 + +[CERES] +source = "DATA/rlus/CERES/rlus_0.5x0.5.nc" +weight = 15 + +[GEWEX.SRB] +source = "DATA/rlus/GEWEX.SRB/rlus_0.5x0.5.nc" +weight = 15 + +[WRMC.BSRN] +source = "DATA/rlus/WRMC.BSRN/rlus.nc" +weight = 12 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Net LW Radiation] +variable = "rlns" +derived = "rlds-rlus" +weight = 1 + +[CERES] +source = "DATA/rlns/CERES/rlns_0.5x0.5.nc" +weight = 15 + +[GEWEX.SRB] +source = "DATA/rlns/GEWEX.SRB/rlns_0.5x0.5.nc" +weight = 15 + +[WRMC.BSRN] +source = "DATA/rlns/WRMC.BSRN/rlns.nc" +weight = 12 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Net Radiation] +variable = "rns" +derived = "rlds-rlus+rsds-rsus" +weight = 2 + +[CERES] +source = "DATA/rns/CERES/rns_0.5x0.5.nc" +weight = 15 + +[Fluxnet] +source = "DATA/rns/FLUXNET/rns.nc" +weight = 12 + +[GEWEX.SRB] +source = "DATA/rns/GEWEX.SRB/rns_0.5x0.5.nc" +weight = 15 + +[WRMC.BSRN] +source = "DATA/rns/WRMC.BSRN/rns.nc" +weight = 12 + +########################################################################### + +[h1: Forcings] +bgcolor = "#EDEDED" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Air Temperature] +variable = "tas" +weight = 2 + +[CRU] +source = "DATA/tas/CRU/tas_0.5x0.5.nc" +weight = 25 + +[Fluxnet] +source = "DATA/tas/FLUXNET/tas.nc" +weight = 9 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Precipitation] +variable = "pr" +cmap = "Blues" +weight = 2 +mass_weighting = True + +[CMAP] +source = "DATA/pr/CMAP/pr_0.5x0.5.nc" +land = True +weight = 20 +table_unit = "mm d-1" +plot_unit = "mm d-1" +space_mean = True + +[Fluxnet] +source = "DATA/pr/FLUXNET/pr.nc" +land = True +weight = 9 +table_unit = "mm d-1" +plot_unit = "mm d-1" + +[GPCC] +source = "DATA/pr/GPCC/pr_0.5x0.5.nc" +land = True +weight = 20 +table_unit = "mm d-1" +plot_unit = "mm d-1" +space_mean = True + +[GPCP2] +source = "DATA/pr/GPCP2/pr_0.5x0.5.nc" +land = True +weight = 20 +table_unit = "mm d-1" +plot_unit = "mm d-1" +space_mean = True + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Relative Humidity] +variable = "rhums" +alternate_vars = "hurs" +cmap = "Blues" +weight = 3 +mass_weighting = True + +[ERA] +source = "DATA/rhums/ERA/rhums_0.5x0.5.nc" +weight = 10 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Downward SW Radiation] +variable = "rsds" +weight = 2 + +[CERES] +source = "DATA/rsds/CERES/rsds_0.5x0.5.nc" +weight = 15 + +[Fluxnet] +source = "DATA/rsds/FLUXNET/rsds.nc" +weight = 12 + +[GEWEX.SRB] +source = "DATA/rsds/GEWEX.SRB/rsds_0.5x0.5.nc" +weight = 15 + +[WRMC.BSRN] +source = "DATA/rsds/WRMC.BSRN/rsds.nc" +weight = 12 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Downward LW Radiation] +variable = "rlds" +weight = 1 + +[CERES] +source = "DATA/rlds/CERES/rlds_0.5x0.5.nc" +weight = 15 + +[GEWEX.SRB] +source = "DATA/rlds/GEWEX.SRB/rlds_0.5x0.5.nc" +weight = 15 + +[WRMC.BSRN] +source = "DATA/rlds/WRMC.BSRN/rlds.nc" +weight = 12 diff --git a/demo/iomb.cfg b/demo/iomb.cfg new file mode 100644 index 00000000..a534397b --- /dev/null +++ b/demo/iomb.cfg @@ -0,0 +1,65 @@ +# This configure file uses observational data which can be obtained by +# running the following command after exporting ILAMB_ROOT to the +# appropriate location. +# +# ilamb-fetch --remote_root http://ilamb.ornl.gov/IOMB-Data +# +[h1: Marine Chemistry] +bgcolor = "#ECFFE6" + +[h2: Nitrate] +variable = "no3" +alternate_vars = "NO3" +cmap = "PuBu" +ctype = "ConfIOMB" + +[WOA] +source = "DATA/nitrate/WOA/nitrate.nc" + +[h2: Phosphate] +variable = "po4" +alternate_vars = "PO4" +cmap = "Oranges" +ctype = "ConfIOMB" + +[WOA] +source = "DATA/phosphate/WOA/phosphate.nc" + +[h2: Silicate] +variable = "si" +alternate_vars = "SiO3" +cmap = "BuPu" +ctype = "ConfIOMB" + +[WOA] +source = "DATA/silicate/WOA/silicate.nc" + +[h1: Physical Quantities] +bgcolor = "#FFECE6" + +[h2: Salinity] +variable = "so" +alternate_vars = "SALT" +cmap = "GnBu" +ctype = "ConfIOMB" + +[WOA] +source = "DATA/salinity/WOA/salinity.nc" + +[h2: Oxygen] +variable = "o2" +alternate_vars = "O2" +cmap = "GnBu" +ctype = "ConfIOMB" + +[WOA] +source = "DATA/oxygen/WOA/oxygen.nc" + +[h2: Temperature] +variable = "thetao" +alternate_vars = "TEMP" +cmap = "rainbow" +ctype = "ConfIOMB" + +[WOA] +source = "DATA/temperature/WOA/temperature.nc" diff --git a/demo/sample.cfg b/demo/sample.cfg new file mode 100644 index 00000000..0e3abaa9 --- /dev/null +++ b/demo/sample.cfg @@ -0,0 +1,18 @@ +# This configure file specifies the variables + +[h1: Radiation and Energy Cycle] +bgcolor = "#FFECE6" + +[h2: Surface Upward SW Radiation] +variable = "rsus" + +[CERES] +source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" + +[h2: Albedo] +variable = "albedo" +derived = "rsus/rsds" + +[CERES] +source = "DATA/albedo/CERES/albedo_0.5x0.5.nc" + diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 00000000..5fff5947 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,186 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: ilamb help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext + +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + +ilamb: + pushd ../; python setup.py install --user; popd + +clean: + rm -rf $(BUILDDIR)/* _generated + +html: ilamb + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/TestPkg.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/TestPkg.qhc" + +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/TestPkg" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/TestPkg" + @echo "# devhelp" + +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/doc/_templates/class.rst b/doc/_templates/class.rst new file mode 100644 index 00000000..714edba7 --- /dev/null +++ b/doc/_templates/class.rst @@ -0,0 +1,25 @@ +{% extends "!autosummary/class.rst" %} + +{% block methods %} +{% if methods %} + .. autosummary:: + :toctree: + {% for item in all_methods %} + {%- if not item.startswith('_') %} + {{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} +{% endif %} +{% endblock %} + +{% block attributes %} +{% if attributes %} + .. autosummary:: + :toctree: + {% for item in all_attributes %} + {%- if not item.startswith('_') %} + {{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} +{% endif %} +{% endblock %} diff --git a/doc/add_data.rst b/doc/add_data.rst new file mode 100644 index 00000000..f1f66c15 --- /dev/null +++ b/doc/add_data.rst @@ -0,0 +1,150 @@ +Adding a Benchmark Dataset +========================== + +The following tutorial builds on the *First Steps* `tutorial +<./first_steps.html>`_ by describing how additional datasets may be +added to our sample benchmark comparison. We will add a Surface Upward +Shortwave Radiation dataset from the the central archive of the +Baseline Surface Radiation Network (BSRN) on the World Radiation +Monitoring Center (WRMC). We have provided a file in the appropriate +format `here `_. We +suggest that you create a directory inside the ``rsus`` directory +called ``WRMC.BSRN`` and place the downloaded file inside. We will +show the appropriate part of the tree here:: + + DATA/ + ├── albedo + │ └── CERES + │ └── albedo_0.5x0.5.nc + └── rsus + ├── CERES + │ └── rsus_0.5x0.5.nc + └── WRMC.BSRN + └── rsus.nc + +To add this dataset to our benchmarks, we only need to add a new line +to ``sample.cfg`` under the ``h2`` heading which corresponds to +Surface Upward Shortwave Radiation. Here we show only the portion of +the configure file which pertains to this variable with the new +dataset addition:: + + [h2: Surface Upward SW Radiation] + variable = "rsus" + + [CERES] + source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" + + [WRMC.BSRN] + source = "DATA/rsus/WRMC.BSRN/rsus.nc" + +Now if we execute the ``ilamb-run`` script as before:: + + ilamb-run --config sample.cfg --model_root $ILAMB_ROOT/MODELS/ --regions global + +we will see the following output to the screen:: + + Searching for model results in /home/ncf/sandbox/ILAMB_sample//MODELS/ + + CLM40cn + + Parsing config file sample.cfg... + + SurfaceUpwardSWRadiation/CERES Initialized + SurfaceUpwardSWRadiation/WRMC.BSRN Initialized + Albedo/CERES Initialized + + Running model-confrontation pairs... + + SurfaceUpwardSWRadiation/CERES CLM40cn UsingCachedData + SurfaceUpwardSWRadiation/WRMC.BSRN CLM40cn Completed 1.0 s + Albedo/CERES CLM40cn UsingCachedData + + Finishing post-processing which requires collectives... + + SurfaceUpwardSWRadiation/CERES CLM40cn Completed 6.4 s + SurfaceUpwardSWRadiation/WRMC.BSRN CLM40cn Completed 6.3 s + Albedo/CERES CLM40cn Completed 6.8 s + + Completed in 29.0 s + +You will notice that on running the script again, we did not have to +perform the analysis step for the confrontations we ran +previously. When a model-confrontation pair is run, we save the +analysis information in a netCDF4 file. If this file is detected in +the setup process, then we will use the results from the file and skip +the analysis step. The plotting, however, is repeated. + +You will also notice that the new ``rsus`` dataset we added ran much +more quickly than the CERES dataset. This is because the new dataset +is only defined at 55 specific sites as opposed to the whole globe at +half degree resolution. Despite the difference in these datasets, the +interface into the system (that is, the configuration file entry) is +the same. This represents an element of our design philosophy--the +benchmark datasets should contain sufficient information so that the +appropriate commensurate information from the model may be +extracted. When we open the ``WRMC.BSRN`` dataset, we detect that the +desired variable is defined over datasites. From this we can then +automatically sample the model results, extracting information from +the appropriate gridcells. + +Weighting Datasets +------------------ + +To view the results of the new dataset, look inside the ``_build`` +directory and open a file called ``index.html`` in your favorite web +browser. You should see a webpage entitled *ILAMB Benchmark Results* +and a series of three tabs, the middle of which is entitled *Results +Table*. If you click on the row of the table which bears the name +*Surface Upward SW Radiation* you will see that the row expands to +reveal how individual datasets contributed to the overall score for +this variable. Here we reproduce this portion of the table. + +=========================== ======= +Dataset CLM40cn +=========================== ======= +Surface Upward SW Radiation 0.77 + CERES (50.0%) 0.79 + WRMC.BSRN (50.0%) 0.74 +=========================== ======= + +The values you get for scores may vary from this table as our scoring +methodology is in flux as we develop and hone it. The main point here +is that we have weighted each dataset equally, as seen in the +percentages listed after each dataset name. While this is a reasonable +default, it is unlikely as you add datasets that you will have equal +confidence in their quality. To address this, we provide you with a +method of weighting datasets in the configuration file. For the sake +of demonstration, let us assume that we are five times as confident in +the CERES data. This we can express by modifying the relevant section +of the configuration file:: + + [h2: Surface Upward SW Radiation] + variable = "rsus" + + [CERES] + source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" + weight = 5 + + [WRMC.BSRN] + source = "DATA/rsus/WRMC.BSRN/rsus.nc" + weight = 1 + +and then running the script as before. This will run quickly as we do +not require a reanalysis for a mere change of weights. Once the run is +complete, open again or reload ``_build/index.html`` and navigate to +the same section of the results table. You should see the change in +weight reflected in the percentages as well as in the overall score +for the variable. + +=========================== ======= +Dataset CLM40cn +=========================== ======= +Surface Upward SW Radiation 0.78 + CERES (83.3%) 0.79 + WRMC.BSRN (16.7%) 0.74 +=========================== ======= + +You may notice that if you apply the weighting by hand based on the +output printed in the table, that you appear to get a different +result. This is because the HTML table output is rounded for display +purposes, but the scores are computed and weighted in full precision. diff --git a/doc/add_model.rst b/doc/add_model.rst new file mode 100644 index 00000000..e3bd2b10 --- /dev/null +++ b/doc/add_model.rst @@ -0,0 +1,117 @@ +Adding a Model +============== + +The following tutorial builds on the `previous <./first_steps.html>`_ +by describing how another model's results may be added to the +benchmarking results for CLM40cn. At this point, we suggest that you +try to incoporate model result data of your own. At a minimum you will +need to have the ``rsus`` and ``rsds`` variables expressed as monthly +mean values at least partially over the time period of the source data +(2000-2012). In the event you do not have model data of your own, this +tutorial will copy the CLM40cn results data and treat it as another +model as a demonstration only. + +The main concept you need to understand is how ``ilamb-run`` finds and +classifies model results. When executing ``ilamb-run`` in the previous +tutorial, we specified an option ``--model_root +$ILAMB_ROOT/MODELS/``. This tells the script where to look for model +results. The script will consider each subdirectory of the specified +directory as a separate model result. So for example, if we copy the +CLM40cn results into a new directory represented in the following +tree:: + + ./MODELS + ├── CLM40cn + │ ├── rsds + │ │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + │ └── rsus + │ └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + └── CLMCopy + ├── rsds + │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + └── rsus + └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + +Then when we execute the same ``ilamb-run`` command as before:: + + ilamb-run --config sample.cfg --model_root $ILAMB_ROOT/MODELS/ --regions global + +We observe that the new model is indeed found and the confrontations +are run. Here we reproduce the screen output:: + + Searching for model results in /home/ncf/sandbox/ILAMB_sample/MODELS/ + + CLM40cn + CLMCopy + + Parsing config file sample.cfg... + + SurfaceUpwardSWRadiation/CERES Initialized + Albedo/CERES Initialized + + Running model-confrontation pairs... + + SurfaceUpwardSWRadiation/CERES CLM40cn UsingCachedData + SurfaceUpwardSWRadiation/CERES CLMCopy Completed 38.4 s + Albedo/CERES CLM40cn UsingCachedData + Albedo/CERES CLMCopy Completed 39.9 s + + Finishing post-processing which requires collectives... + + SurfaceUpwardSWRadiation/CERES CLM40cn Completed 3.8 s + SurfaceUpwardSWRadiation/CERES CLMCopy Completed 3.0 s + Albedo/CERES CLM40cn Completed 3.9 s + Albedo/CERES CLMCopy Completed 3.8 s + + Completed in 92.8 s + +You will notice that on executing the run script again, we did not have to +perform the analysis step for the model we ran in the previous +tutorial. When a model-confrontation pair is run, we save the analysis +information in a netCDF4 file. If this file is detected in the setup +process, then we will use the results from the file and skip the +analysis step. The plotting, however, is repeated. This is because +adding extra models will possible change the limits on the plots and +thus must be rendered again. + +You have a great deal of flexibility as to how results are saved. That +is, they need not exist in separate files within subdirectories +bearing the name of the variable which they represent. We could, for +example, move the sample data around in the following way:: + + ./MODELS + ├── CLM40cn + │ ├── rsds + │ │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + │ └── rsus + │ └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + └── CLMCopy + └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + +and the run script will interpret the model in exactly the same +manner. The variables can even be in the same file or across multiple +files representing different sections of the simulation time. We will +detect which variables are in which files, and combine them +automatically. The only real requirement is that all the files be +located under a directory bearing the model's name. This directory +could even be a symbolic link. On my personal machine, I have data +from a CLM45bgc run saved. So I can create a symbolic link from my +``MODELS`` directory to the location on my local machine:: + + ./MODELS + ├── CLM40cn + │ ├── rsds + │ │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + │ └── rsus + │ └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + ├── CLM45bgc -> /work/ILAMB/MODELS/CLM/CLM45bgc/ + └── CLMCopy + └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + +and the run script will follow this link and perform the analysis on the +result files it finds there. This allows you to create a group of +models which you wish to study without having to move results around +your machine. + diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 00000000..3287ccc1 --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,357 @@ +# -*- coding: utf-8 -*- +# +# ILAMB documentation build configuration file, created by +# sphinx-quickstart on Sat Oct 25 15:45:20 2014. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.coverage', + 'sphinx.ext.mathjax', + 'sphinx.ext.viewcode', + 'sphinxcontrib.napoleon' +] + +autosummary_generate = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'ILAMB' +copyright = u'2014, Nathan Collier, Forrest Hoffman' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +import ILAMB +version = ILAMB.__version__ +# The full version, including alpha/beta/rc tags. +release = ILAMB.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build','_templates'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinxdoc' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = [] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'ILAMBdoc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', + +# Latex figure (float) alignment +#'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'ILAMB.tex', u'ILAMB Documentation', + u'Nathan Collier, Forrest Hoffman', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'testpkg', u'ILAMB Documentation', + [u'Nathan Collier, Forrest Hoffman'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'ILAMB', u'ILAMB Documentation', + u'Nathan Collier, Forrest Hoffman', 'ILAMB', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False + + +# -- Options for Epub output ---------------------------------------------- + +# Bibliographic Dublin Core info. +epub_title = u'ILAMB' +epub_author = u'Nathan Collier, Forrest Hoffman' +epub_publisher = u'Nathan Collier, Forrest Hoffman' +epub_copyright = u'2014, Nathan Collier, Forrest Hoffman' + +# The basename for the epub file. It defaults to the project name. +#epub_basename = u'ILAMB' + +# The HTML theme for the epub output. Since the default themes are not optimized +# for small screen space, using the same theme for HTML and epub output is +# usually not wise. This defaults to 'epub', a theme designed to save visual +# space. +#epub_theme = 'epub' + +# The language of the text. It defaults to the language option +# or 'en' if the language is not set. +#epub_language = '' + +# The scheme of the identifier. Typical schemes are ISBN or URL. +#epub_scheme = '' + +# The unique identifier of the text. This can be a ISBN number +# or the project homepage. +#epub_identifier = '' + +# A unique identification for the text. +#epub_uid = '' + +# A tuple containing the cover image and cover page html template filenames. +#epub_cover = () + +# A sequence of (type, uri, title) tuples for the guide element of content.opf. +#epub_guide = () + +# HTML files that should be inserted before the pages created by sphinx. +# The format is a list of tuples containing the path and title. +#epub_pre_files = [] + +# HTML files shat should be inserted after the pages created by sphinx. +# The format is a list of tuples containing the path and title. +#epub_post_files = [] + +# A list of files that should not be packed into the epub file. +epub_exclude_files = ['search.html'] + +# The depth of the table of contents in toc.ncx. +#epub_tocdepth = 3 + +# Allow duplicate toc entries. +#epub_tocdup = True + +# Choose between 'default' and 'includehidden'. +#epub_tocscope = 'default' + +# Fix unsupported image types using the PIL. +#epub_fix_images = False + +# Scale large images. +#epub_max_image_width = 0 + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#epub_show_urls = 'inline' + +# If false, no index is generated. +#epub_use_index = True diff --git a/doc/confront.rst b/doc/confront.rst new file mode 100644 index 00000000..ac6dcde3 --- /dev/null +++ b/doc/confront.rst @@ -0,0 +1,294 @@ +Custom Confrontations +===================== + +The ``Confrontation`` object we described in the previous tutorial is +implemented as a python class. This tutorial will assume you have some +familiarity with python and classes. We will try to make the concepts +easy to follow, but if you find that you need to learn about *class* +basics, we recommend the python documentation on them `here +`_. + +In this tutorial we will explain the implementation of a custom +``Confrontation`` by way of example. We will detail the code that we +use in the ILAMB system for benchmarking the global net ecosystem +carbon balance. The generic ``Confrontation`` will not work in this +case because: + +* There is no variable in the model outputs which directly compares to + the benchmark datasets. The variable ``nbp`` must be integrated over + the globe for it to be comparable. +* The analysis we want to perform is different than our standard mean + state analysis. We will compare the bias and RMSE of the integrated + quantity, but then we would like to also view the accumulation of + carbon over the time period. + +So we have some special-purpose code to write. We will present here +the implementation bit by bit and explain each function and +section. However, if you are following along and implementing the +class as you read, we recommend you look at the original source which +may be found on our `bitbucket +`_ +site. This is because the amount of tab space gets shifted in the +generation of this document. I will also omit the documentation +strings and imports here to keep the code short. + +The Constructor +--------------- + +The first thing we will do, is define a new class ``ConfNBP`` which +will be derived from the ``Confrontation`` base class. This means that +all the methods and member data of the ``Confrontation`` class will be +part of ``ConfNBP`` automatically. This is helpful, as it means that +the developer only needs to rewrite the functions that must behave +differently in his benchmark. So we define our class by writing:: + + class ConfNBP(Confrontation): + + def __init__(self,**keywords): + + # Ugly, but this is how we call the Confrontation constructor + super(ConfNBP,self).__init__(**keywords) + + # Now we overwrite some things which are different here + self.regions = ['global'] + +We place this class in a file which bears the name of the class +itself, ``ConfNBP.py``. The ``__init__`` function is what is known as +the constructor. A class can be thought of as a template and the +constructor is the function which runs when a new instance is +created. If I were to type:: + + a = Confrontation() + b = Confrontation() + +I would be creating two instances (``a`` and ``b``) of the +``Confrontation`` class and the constructor would run separately for +each of them. The constructor for the ``Confrontation`` class takes in +keywords as arguments. This means that instead of requiring users to +place arguments in a defined order, we allow them to specify arguments +by their names. We did this in the previous tutorial, when we +initialized a ``Confrontation`` in the following way:: + + c = Confrontation(source = os.environ["ILAMB_ROOT"] + "/DATA/rsus/CERES/rsus_0.5x0.5.nc", + name = "CERES", + variable = "rsus") + +The keywords we used here were ``source``, ``name``, and +``variable``. You have a lot of control over what a ``Confrontation`` +does via these keywords. A full list of them is available in the +`documentation +<_generated/ILAMB.Confrontation.Confrontation.html>`_. For the most +part, we want to use the ``Confrontation`` constructor as it is, and +so we could just leave the ``__init__`` function +unimplemented. However, one of the keywords of the ``Confrontation`` +constructor is not valid in our benchmark--the ``regions`` +keyword. This is a keyword where a user may specify a list of GFED +regions over which we will perform the analysis. In the case of our +``ConfNBP``, this is not a valid option as the benchmark data is +integrated over the globe. + +For this reason, we implement our own ``__init__`` function where we +manually call the constructor of the ``Confrontation`` class. This is +handled by using the python function ``super``. This references the +super object of our ``ConfNBP`` object and allows us to manually call +its constructor. After this constructor has run, we simply overwrite +the value of the ``regions`` member data to be the only valid value. + +Staging Data +------------ + +We need to implement our own ``stageData`` functionality as models do +not provide us with the integrated ``nbp`` directly. We will go over +its implementation here in pieces. First we get the observational +dataset:: + + def stageData(self,m): + + # get the observational data + obs = Variable(filename = self.source, + variable_name = self.variable, + alternate_vars = self.alternate_vars) + +So you will see first that the function signature has a ``self`` +argument. This will be true of all member functions of our class. This +is a special argument which is used to access all the member data of +the class itself. The second argument is ``m`` which is a instance of +a ``ModelResult``. Just below we use the ``Variable`` constructor to +extract the source data for this benchmark using member data of our +class. The member data ``source`` refers to the full path of the +benchmark dataset, ``variable`` is the name of the variable to extract +from within, and ``alternate_vars`` is a list of alternative names for +variable names which we can accept. By convention, we use the ``obs`` +name to refer to the returned ``Variable``. Next, we need to extract +the same data from the model:: + + # the model data needs integrated over the globe + mod = m.extractTimeSeries(self.variable, + alt_vars = self.alternate_vars) + mod = mod.integrateInSpace().convert(obs.unit) + + obs,mod = il.MakeComparable(obs,mod,clip_ref=True) + + # sign convention is backwards + obs.data *= -1. + mod.data *= -1. + + return obs,mod + +Here we use the ``ModelResult`` instance ``m`` to extract the +variable, and immediately integrate it over all space. We also ensure +that the units match the observations and, again by convention, we +refer to this in a variable we call ``mod``. Then we use the function +`MakeComparable <_generated/ILAMB.ilamblib.MakeComparable.html>`_ to ensure +that both the ``obs`` and ``mod`` variables are on the same time +frame, trimming away the non-overlapping times. Finally we multiply the +data associated with the observations and models by a negative one +because of a unwanted sign convention. + +The main concept of the ``stageData`` function is that you are passed +a ``ModelResult`` and you need to return two ``Variables`` which +represent comparable quantities from the observational and model +datasets. The ILAMB system does not care how you came about these +quantities. Here we have used more of the ILAMB package to create the +quantities we wish to compare. However, you may prefer to use other +tools or even interface to more complex methods of extracting relevant +information. The ILAMB package simply defines an interface which makes +the results of such data manipulation usable in a consistent system. + +Confront +-------- + +We also need to implement our own ``confront`` functionality. This is +because most of our `mean state +<./_generated/ILAMB.ilamblib.AnalysisMeanState.html>`_ is not relevant +for our benchmark, and we would like to study the accumulation of +carbon which is not part of the procedure. As before we will break up +the ``confront`` function we implemented and explain it in sections:: + + def confront(self,m): + + # Grab the data + obs,mod = self.stageData(m) + +As with the ``stageData`` function, the ``confront`` function takes in +a ``ModelResult`` instance ``m`` and immediately calls the +``stageData`` function we just implemented. The observational dataset +and model result are returned as represented as ``Variables`` and +named ``obs`` and ``mod``, respectively. For both datasets, we want to +study the accumulated amount of carbon over the time period:: + + obs_sum = obs.accumulateInTime().convert("Pg") + mod_sum = mod.accumulateInTime().convert("Pg") + +as well as compare the mean values over the time period:: + + obs_mean = obs.integrateInTime(mean=True) + mod_mean = mod.integrateInTime(mean=True) + +and then the bias and RMSE:: + + bias = obs.bias(mod) + rmse = obs.rmse(mod) + +The functions, ``accumulateInTime``, ``convert``, ``integrateInTime``, +``bias``, and ``rmse`` are all member functions of the `Variable +<_generated/ILAMB.Variable.Variable.html>`_ class. So you can see that +this keeps analysis clean, short, and human readable. This handles the +majority of the analysis which we want to perform in this +confrontation. However, the ILAMB system is geared towards determining +a score from the analysis results. In this case, we will score a model +based on the bias and the RMSE in the following way: + + .. math:: S_{\text{bias}} = e^{-\left| \frac{\int \left(obs(t) - mod(t)\right)\ dt }{\int obs(t)\ dt } \right|} + .. math:: S_{\text{RMSE}} = e^{-\sqrt{ \frac{\int \left(obs(t) - mod(t)\right)^2\ dt }{\int obs(t)^2\ dt } }} + +This is accomplished in the following way:: + + obs_L1 = obs.integrateInTime() + dif_L1 = deepcopy(obs) + dif_L1.data -= mod.data + dif_L1 = dif_L1.integrateInTime() + bias_score = Variable(name = "Bias Score global", + unit = "1", + data = np.exp(-np.abs(dif_L1.data/obs_L1.data))) + +for the bias score and:: + + obs_L2 = deepcopy(obs) + obs_L2.data *= obs_L2.data + obs_L2 = obs_L2.integrateInTime() + dif_L2 = deepcopy(obs) + dif_L2.data = (dif_L2.data-mod.data)**2 + dif_L2 = dif_L2.integrateInTime() + rmse_score = Variable(name = "RMSE Score global", + unit = "1", + data = np.exp(-np.sqrt(dif_L2.data/obs_L2.data))) + +for the RMSE score. The code here is a bit more ugly than the previous +and reflects ways in which the ``Variable`` object needs to grow. At +this point the analysis results are finished and we are ready to save +things into result files. First, we will rename the variables in the +following way:: + + obs .name = "spaceint_of_nbp_over_global" + mod .name = "spaceint_of_nbp_over_global" + obs_sum .name = "accumulate_of_nbp_over_global" + mod_sum .name = "accumulate_of_nbp_over_global" + obs_mean.name = "Period Mean global" + mod_mean.name = "Period Mean global" + bias .name = "Bias global" + rmse .name = "RMSE global" + +We rename the variables because the ILAMB plotting and HTML generation +engine is built to recognize certain keywords in the variable name and +subsequently render the appropriate plots. Since our ``obs`` and +``mod`` variables represent spatial integrals of ``nbp``, we name them +with the keyword ``spaceint``. The ``accumulate`` keyword also will +cause a plot to automatically be generated and placed in the HTML +output in a predetermined location. This feature makes the +presentation of results trivial. The scalar quantities are also +changed such that their names reflect the table headings of the HTML +output. + +Finally we dump these variables into netCDF4 files. The first file +corresponds to the current model being analyzed. The dataset is opened +which will be saved into a logical path, with descriptive names. The +``Variable`` class has support for simply asking that an instanced be +dumped into an open dataset. Any dimension information or units are +automatically recorded:: + + results = Dataset("%s/%s_%s.nc" % (self.output_path,self.name,m.name),mode="w") + results.setncatts({"name" :m.name, "color":m.color}) + mod .toNetCDF4(results) + mod_sum .toNetCDF4(results) + mod_mean .toNetCDF4(results) + bias .toNetCDF4(results) + rmse .toNetCDF4(results) + bias_score.toNetCDF4(results) + rmse_score.toNetCDF4(results) + results.close() + +We also write out information from the benchmark dataset as +well. However, since confrontations can be run in parallel, only the +confrontation that is flagged as the master need write this output:: + + if self.master: + results = Dataset("%s/%s_Benchmark.nc" % (self.output_path,self.name),mode="w") + results.setncatts({"name" :"Benchmark", "color":np.asarray([0.5,0.5,0.5])}) + obs .toNetCDF4(results) + obs_sum .toNetCDF4(results) + obs_mean.toNetCDF4(results) + results.close() + +That is it +---------- + +While more involved than simply adding a dataset or model result to +the analysis, that is all we need to implement for our custom +confrontation. As you can see, we managed to encapsulate all of the +relevant code into one file which interfaces seamlessly with the rest +of the ILAMB system. In the case of ``ConfNBP.py``, we have included +it in the main repository for the ILAMB package. However, users may +create their own confrontations and host/maintain them separately for +use with the system. We see this as a first step towards a more +general framework for community-driven benchmarking. diff --git a/doc/custom_regions.rst b/doc/custom_regions.rst new file mode 100644 index 00000000..45ccbc6a --- /dev/null +++ b/doc/custom_regions.rst @@ -0,0 +1,124 @@ +Defining Custom Regions +======================= + +In the `tutorial <./ilamb_run.html>`_ explaining the options of +``ilamb-run``, we highlight that custom regions may be defined in two +ways. The first is region definition by latitude and longitude bounds +which can be done in the form of a text file in the following comma +delimited format:: + + #label,name ,lat_min,lat_max,lon_min,lon_max + usa ,Continental US, 24, 50, -126, -66 + alaska,Alaska , 53, 72, -169, -129 + +The first column is the label to be used, followed by the region +name. Then the minimum and maximum bounds on the latitude and +longitude are specified. Note that longitude values are expected on +the [-180,180] interval. In this current iteration regions cannot be +specified which span the international dateline. + +The second method is by creating a netCDF4 file which will be used +internally to create a mask for each region. This we will demonstrate +by encoding the above regions but in this format. First we create the +spatial grid on which we will define the regions. + +.. code-block:: python + + from netCDF4 import Dataset + import numpy as np + + # Create the lat/lon dimensions + res = 0.5 + latbnd = np.asarray([np.arange(- 90 , 90 ,res), + np.arange(- 90+res, 90+0.01,res)]).T + lonbnd = np.asarray([np.arange(-180 ,180 ,res), + np.arange(-180+res,180+0.01,res)]).T + lat = latbnd.mean(axis=1) + lon = lonbnd.mean(axis=1) + +Next we create an array of integers which we will use to mark the +regions we wish to encode. This is essentially painting by numbers. We +initialize the array to a missing value which we will encode later. + +.. code-block:: python + + # Create the number array, initialize to a missing value + miss = -999 + ids = np.ones((lat.size,lon.size),dtype=int)*miss + +Then we paint the regions we wish to encode using the latitude and +longitude bounds which were in the sample text file above. This part +will vary depending on how you wish to define regions. For example, +our regions here will still appear to be defined by latitude and +longitude bounds because that is how we are creating the mask. You may +find other sources for your region definitions which will allow more +precise representations. Note that this method of definition means +that regions cannot overlap in a single file. If you need to define +overlapping regions, put each region in a separate file. + +.. code-block:: python + + # Paint the Continental US with a `0` + ids[np.where(np.outer((lat>= 24)*(lat<= 50), + (lon>=-126)*(lon<=- 66)))] = 0 + + # Paint Alaska with a `1` + ids[np.where(np.outer((lat>= 53)*(lat<= 72), + (lon>=-169)*(lon<=-129)))] = 1 + +Next we convert the ``numpy`` integer array to a masked array where we +mask by the missing value we defined above. Then we create an array of +labels to use as indentifiers for the integer numbers we defined. A +``0`` in the ``ids`` array will correspond to the ``USA`` region and a +``1`` to the ``Alaska`` region. These lower case version of these +names will be used as region labels. + +.. code-block:: python + + # Convert the ids to a masked array + ids = np.ma.masked_values(ids,miss) + + # Create the array of labels + lbl = np.asarray(["USA","Alaska"]) + +Finally we encode the netCDF4 dataset. There are a few important +details in this code. The first is to use the ``numpy`` datatypes of +the arrays when creating netCDF4 variables. This is especially +important in encoding the ``labels`` array as it will ensure the +string array is created properly. The other important detail is to +encode the ``labels`` attribute of the ``I`` variable. This is what +tells the ILAMB system where to find the labels for the integers +defined in the array. + +.. code-block:: python + + # Create netCDF dimensions + dset = Dataset("regions.nc",mode="w") + dset.createDimension("lat" ,size=lat.size) + dset.createDimension("lon" ,size=lon.size) + dset.createDimension("nb" ,size=2 ) + dset.createDimension("n" ,size=lbl.size) + + # Create netCDF variables + X = dset.createVariable("lat" ,lat.dtype,("lat" )) + XB = dset.createVariable("lat_bounds" ,lat.dtype,("lat","nb" )) + Y = dset.createVariable("lon" ,lon.dtype,("lon" )) + YB = dset.createVariable("lon_bounds" ,lon.dtype,("lon","nb" )) + I = dset.createVariable("ids" ,ids.dtype,("lat","lon")) + L = dset.createVariable("labels" ,lbl.dtype,("n" )) + + # Load data and encode attributes + X [...] = lat + X.units = "degrees_north" + XB[...] = latbnd + + Y [...] = lon + Y.units = "degrees_east" + YB[...] = lonbnd + + I[...] = ids + I.labels= "labels" + + L[...] = lbl + + dset.close() diff --git a/doc/first_steps.rst b/doc/first_steps.rst new file mode 100644 index 00000000..3f5c1f7b --- /dev/null +++ b/doc/first_steps.rst @@ -0,0 +1,233 @@ +First Steps +=========== + +This software comes with no data. It is meant to be generic software +which facilitates the automatic confrontation of model results with +benchmark observational datasets. However, the best way to learn how +to use this software is with actual data. To this end we have a +relatively small sample which you can `download +`_. Extract +this file to a location of your choosing by the following:: + + tar -xvf minimal_ILAMB_data.tgz + cd ILAMB_sample + export ILAMB_ROOT=$PWD + +We use this environment variable in the ILAMB package to point to the +top level directory of the data. Later, when we reference specific +data locations, we can specify them relative to this path. This both +shortens the path and makes the configuration portable to other +systems or data locations. + +The following tree represents the organization of the contents of this +sample data:: + + ILAMB_sample/ + ├── DATA + │ ├── albedo + │ │ └── CERES + │ │ └── albedo_0.5x0.5.nc + │ └── rsus + │ └── CERES + │ └── rsus_0.5x0.5.nc + └── MODELS + └── CLM40cn + ├── rsds + │ └── rsds_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + └── rsus + └── rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc + +There are two main branches in this directory. The first is the +``DATA`` directory--this is where we keep the observational datasets +each in a subdirectory bearing the name of the variable. While not +strictly necesary to follow this form, it is a convenient +convention. The second branch is the ``MODEL`` directory in which we +see a single model result from CLM. + +Configure Files +--------------- + +Now that we have data, we need to setup a file which the ILAMB package +will use to initiate a benchmark study. There is such a file which +comes with the software package in the ``demo`` directory called +``sample.cfg``. Navigate to the demo directory and open this file or view it `online +`_. We also reproduce it here for the purpose of this tutorial:: + + # This configure file specifies the variables + + [h1: Radiation and Energy Cycle] + bgcolor = "#FFECE6" + + [h2: Surface Upward SW Radiation] + variable = "rsus" + + [CERES] + source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" + + [h2: Albedo] + variable = "albedo" + derived = "rsus/rsds" + + [CERES] + source = "DATA/albedo/CERES/albedo_0.5x0.5.nc" + +We note that while the ILAMB package is written in python, this file +contains no python and is written in a small configure language of our +invention. Here we will go over this file line by line and explain how +each entry functions. + +At the top of the file, you see the following lines:: + + [h1: Radiation and Energy Cycle] + bgcolor = "#FFECE6" + +This is a tag that we use to tell the system that we will have a top +level heading ``h1`` which we call *Radiation and Energy Cycle*. While +you can name this section anything of your choosing, we have chosen +this name as it is descriptive of the benchmarking activities we will +perform. Also note that you may specify a background color here in +hexadecimal format (we found this site to be helpful to play around +with `colors `_). This color will +be used in the output which we will show later. It is important to +understand that heading are hierarchical--this heading owns everything +underneath it until the next ``h1`` tag is found or the file ends. We +use ``h1`` level headings to group variables of a given type to better +organize the output. + +Below this, you will notice a second level heading which appears like +this:: + + [h2: Surface Upward SW Radiation] + variable = "rsus" + +We will be looking at radiation here. The ``variable`` tag is the name +of the variable inside the dataset which represents the variable of +interest. Here ``rsus`` is a standard name used to represent +*Surface Upward Shortwave Radiation*. We use ``h2`` headings to +represent a variable which we wish to compare. + +The next entry in the file appears as the following:: + + [CERES] + source = "DATA/rsus/CERES/rsus_0.5x0.5.nc" + +First, notice the absence of a ``h1`` or ``h2`` tag. This indicates +that this entry is a particular dataset of a given variable (our +``h2`` heading) of a given grouping (our ``h1`` heading). We have +named it CERES as that is the name of the data source we have +included. We only have to specify the location of the source dataset, +relative to the environment variable we set earlier, ``ILAMB_ROOT``. + +At this point we feel it important to mention that this is the minimum +required to setup a benchmark study in this system. If you have an +observational dataset which directly maps to a variable which is +output by models as ``rsus`` is, you are done. + +However, it is possible that your dataset has no direct analog in the +list of variables which models output and some manipulation is +needed. We have support for when your dataset corresponds to an +algebraic function of model variables. Consider the remaining entries +in our sample:: + + [h2: Albedo] + variable = "albedo" + derived = "rsus/rsds" + + [CERES] + source = "DATA/albedo/CERES/albedo_0.5x0.5.nc" + +We have done two things here. First we started a new ``h2`` heading +because we will now look at albedo. But albedo is not a variable which +is included in our list of model outputs (see the tree above). However +we have both upward and downward radiation, so we could compute +albedo. This is accomplished by adding the ``derived`` tag and +specifying the algebraic relationship. When our ILAMB system looks for +the albedo variable for a given model and cannot find it, it will try +to find the variables which are the arguments of the expression you +type in the ``derived`` tag. It will then combined them automatically +and resolve unit differences. + +The configuration language is small, but allows you to change a lot of +the behavior of the system. The full functionality is documented `here +`_. Non-algebraic manipulations are also possible, but +will be covered in a more advanced tutorial. + +Running the Study +----------------- + +Now that we have the configuration file set up, you can run the study +using the ``ilamb-run`` script. Executing the command:: + + ilamb-run --config sample.cfg --model_root $ILAMB_ROOT/MODELS/ --regions global + +If you are on some institutional resource, you may need to launch the +above command using a submission script, or request an interactive +node. As the script runs, it will yield output which resembles the +following:: + + Searching for model results in /Users/ncf/sandbox/ILAMB_sample/MODELS/ + + CLM40cn + + Parsing config file sample.cfg... + + SurfaceUpwardSWRadiation/CERES Initialized + Albedo/CERES Initialized + + Running model-confrontation pairs... + + SurfaceUpwardSWRadiation/CERES CLM40cn Completed 37.3 s + Albedo/CERES CLM40cn Completed 44.7 s + + Finishing post-processing which requires collectives... + + SurfaceUpwardSWRadiation/CERES CLM40cn Completed 3.3 s + Albedo/CERES CLM40cn Completed 3.3 s + + Completed in 91.8 s + +What happened here? First, the script looks for model results in the +directory you specified in the ``--model_root`` option. It will treat +each subdirectory of the specified directory as a separate model +result. Here since we only have one such directory, ``CLM40cn``, it +found that and set it up as a model in the system. Next it parsed the +configure file we examined earlier. We see that it found the CERES +data source for both variables as we specified it. If the source data +was not found or some other problem was encountered, the green +``Initialized`` will appear as red text which explains what the +problem was (most likely ``MisplacedData``). If you encounter this +error, make sure that ``ILAMB_ROOT`` is set correctly and that the +data really is in the paths you specified in the configure file. + +Next we ran all model-confrontation pairs. In our parlance, a +*confrontation* is a benchmark observational dataset and its +accompanying analsys. We have two confrontations specified in our +configure file and one model, so we have two entries here. If the +analysis completed without error, you will see a green ``Completed`` +text appear along with the runtime. Here we see that ``albedo`` took a +few seconds longer than ``rsus``, presumably because we had the +additional burden of reading in two datasets and combining them. + +The next stage is the post-processing. This is done as a separate loop +to exploit some parallelism. All the work in a model-confrontation +pair is purely local to the pair. Yet plotting results on the same +scale implies that we know the maxmimum and minimum values from all +models and thus requires the communcation of this information. Here, +as we are plotting only over the globe and not extra regions, the +plotting occurs quickly. + +Viewing the Output +------------------ + +The whole process generated a new directory of results in the demo +dorectory called ``_build``. To browse the results, open the +``_build/index.html`` file in any browser and you will see a webpage +with a summary image in the center. As we have so few variables and +models, this image will not make much sense at this point. Instead, +click the middle tab called ``Results Table``. From here you will see +both variables which we compared. Clicking on eithe will expand the +row to show the data sources which were part of the study. If you +further click on the CERES link in any row, it will take you to the +plots and tabulated information from the study. + + diff --git a/doc/format_data.rst b/doc/format_data.rst new file mode 100644 index 00000000..0a3e3043 --- /dev/null +++ b/doc/format_data.rst @@ -0,0 +1,220 @@ +Formatting a Benchmark Dataset +============================== + +The ILAMB system is designed to accept files in the form of netCDF4 +datasets which follow the `CF Conventions +`_. These conventions define metadata that +provide a definitive description of what the data in each variable +represents, and the spatial and temporal properties of the data. This +enables ILAMB to decide how to create a commensurate quantity from a +model's output results. + +While it is sufficient to follow the CF conventions when building your +observational dataset, ILAMB does not rigidly require full adherence +to this standard. That is to say, it is only necessary to have some of +the required fields and attributes. In this tutorial we will +demonstrate encoding a few demonstration files using python. However, +the samples only demonstrate what is needed for ILAMB to function and +can be replicated using other tools (i.e. Matlab, NCL). + +Globally gridded data +--------------------- + +In this sample we will create a random variable representing monthly +mean values from 1950-1960 on a 2 degree global grid. First we open a +dataset for writing and then create the time dimension data. + +.. code-block:: python + + from netCDF4 import Dataset + import numpy as np + + # Open a dataset for writing + dset = Dataset("global_sample.nc",mode="w") + + # Create temporal dimension + nyears = 10 + month_bnd = np.asarray([0,31,59,90,120,151,181,212,243,273,304,334,365],dtype=float) + tbnd = np.asarray([((np.arange(nyears)*365)[:,np.newaxis]+month_bnd[:-1]).flatten(), + ((np.arange(nyears)*365)[:,np.newaxis]+month_bnd[+1:]).flatten()]).T + tbnd += (1950-1850)*365 + t = tbnd.mean(axis=1) + +While the ``numpy`` portion of this code may be confusing, in concept +we are creating a ``tbnd`` array with a shape ``(120,2)`` which +contains the beginning and ending day of each month from 1950 +to 1960. Subsequently we compute a time array ``t`` of shape ``(120)`` +as the mean value between each of these bounds. + +Encoding the bounds of the time dimension is an important part of +creating the dataset for ILAMB. Many modeling centers have different +conventions as to where a given ``t`` is reported relative to the +interval ``tbnd``. By specifying the time bounds, ILAMB can precisely +match model output to the correct time interval. + +Consider encoding the time dimension even if your data is only +spatial. Many times the observational data we have may be a sparse +collection of points across a decade of observations. We mean to +compare these observations to a mean of the model result across some +time span. In this case, you can build a ``tbnd`` array of shape +``(1,2)`` where the bounds defines the span across which it is +appropriate to compare models. When ILAMB reads in this dataset, it +will detect a mistmatch is the temporal resolution of the model output +and your observational dataset and automatically coarsen the model +output across the specified time bounds. + +Now we move on to the spatial grid and the data itself. + +.. code-block:: python + + # Create spatial dimension + res = 2. + latbnd = np.asarray([np.arange(- 90 , 90 ,res), + np.arange(- 90+res, 90+0.01,res)]).T + lonbnd = np.asarray([np.arange(-180 ,180 ,res), + np.arange(-180+res,180+0.01,res)]).T + lat = latbnd.mean(axis=1) + lon = lonbnd.mean(axis=1) + + # Create some fake data + data = np.ma.masked_array(np.random.rand(t.size,lat.size,lon.size)) + +In this case we again use ``numpy`` to create bounding arrays for the +latitude and longitude. As with the temporal dimension, this is +preferred as it removes ambiguity and improves the accuracy which +ILAMB can deliver. The fake data here is just full of random numbers +in this case with no mask. Normally this data would come from some +other source. This is typically the most time consuming part of the +dataset creation process as data providers seldom provide their +datasets in netCDF format. + +Once you have all the information in memory, then we turn to encoding +the netCDF4 file. First we create all the dimensions and variables we +will use. For more information on these functions, consult the +`netcdf4-python `_ +documentation. + +.. code-block:: python + + # Create netCDF dimensions + dset.createDimension("time",size= t.size) + dset.createDimension("lat" ,size=lat.size) + dset.createDimension("lon" ,size=lon.size) + dset.createDimension("nb" ,size=2 ) + + # Create netCDF variables + T = dset.createVariable("time" ,t.dtype ,("time" )) + TB = dset.createVariable("time_bounds",t.dtype ,("time","nb")) + X = dset.createVariable("lat" ,lat.dtype ,("lat" )) + XB = dset.createVariable("lat_bounds" ,lat.dtype ,("lat","nb" )) + Y = dset.createVariable("lon" ,lon.dtype ,("lon" )) + YB = dset.createVariable("lon_bounds" ,lon.dtype ,("lon","nb" )) + D = dset.createVariable("var" ,data.dtype,("time","lat","lon")) + +Finally we load the netCDF4 Variables (``T,TB,X,XB,Y,YB,D``) with the +corresponding numerical values (``t,tbnd,lat,latbnd,lon,lonbnd,data``) +we computed in previous steps. We also encode a few attributes which +ILAMB will need as a bare minimum to correctly interpret the +values. Any units provided will need to adhere to the CF convention, see +`here +`_. + +.. code-block:: python + + # Load data and encode attributes + T [...] = t + T.units = "days since 1850-01-01" + T.calendar = "noleap" + T.bounds = "time_bounds" + TB[...] = tbnd + + X [...] = lat + X.units = "degrees_north" + XB[...] = latbnd + + Y [...] = lon + Y.units = "degrees_east" + YB[...] = lonbnd + + D[...] = data + D.units = "kg m-2 s-1" + dset.close() + +Site data +--------- + +Encoding data from a site or collection of sites is similar with two +main distinctions. First, there is a ``data`` dimension referring to +the number of sites in the set. The latitude and longitude arrays are +of this dimension. Second, the time array must span the maximum +coverage of the site collection. Consider a sample set here consisting +of two sites: site A which has monthly mean data from 1950 and site B +with monthly mean data from 1951. One thing to emphasize is that while +not part of the units description, these times need to be in UTC +format. This can be problematic as sites tend to store their data in a +local time coordinate. The time portion of our script is similar. + +.. code-block:: python + + from netCDF4 import Dataset + import numpy as np + + # Open a dataset for writing + dset = Dataset("global_sample.nc",mode="w") + + # Create temporal dimension + nyears = 2 + month_bnd = np.asarray([0,31,59,90,120,151,181,212,243,273,304,334,365],dtype=float) + tbnd = np.asarray([((np.arange(nyears)*365)[:,np.newaxis]+month_bnd[:-1]).flatten(), + ((np.arange(nyears)*365)[:,np.newaxis]+month_bnd[+1:]).flatten()]).T + tbnd += (1950-1850)*365 + t = tbnd.mean(axis=1) + +However the spatial portion just consists of two locations and +contains no bounds. The data array is then a 2D array where the first +dimension is the total number of time intervals represented and the +second dimension is the number of sites. The data array itself needs +to be masked over regions where each site contains no data. ILAMB will +apply this mask to the model results which it extracts. + +.. code-block:: python + + lat = np.asarray([- 35.655,-25.0197]) + lon = np.asarray([ 148.152, 31.4969]) + + data = np.ma.masked_array(np.zeros((t.size,2)),mask=True) # masked array of zeros + data[:12,0] = np.random.rand(12) # site A's random data + data[12:,1] = np.random.rand(12) # site B's random data + +As before this is the step that is the most complicated as it involves parsing text files into this format. Finally we output again the dimensions and variables to the netCDF4 file. + +.. code-block:: python + + # Create netCDF dimensions + dset.createDimension("time",size=t.size) + dset.createDimension("data",size=2 ) + dset.createDimension("nb" ,size=2 ) + + # Create netCDF variables + T = dset.createVariable("time" ,t.dtype ,("time" )) + TB = dset.createVariable("time_bounds",t.dtype ,("time","nb" )) + X = dset.createVariable("lat" ,lat.dtype ,("data" )) + Y = dset.createVariable("lon" ,lon.dtype ,("data" )) + D = dset.createVariable("var" ,data.dtype,("time","data")) + + # Load data and encode attributes + T [...] = t + T.units = "days since 1850-01-01" + T.calendar = "noleap" + T.bounds = "time_bounds" + TB[...] = tbnd + + X [...] = lat + X.units = "degrees_north" + + Y [...] = lon + Y.units = "degrees_east" + + D[...] = data + D.units = "kg m-2 s-1" + dset.close() diff --git a/doc/ilamb_doctor.rst b/doc/ilamb_doctor.rst new file mode 100644 index 00000000..97d64d13 --- /dev/null +++ b/doc/ilamb_doctor.rst @@ -0,0 +1,61 @@ +Diagnosing Missing Model Values with ``ilamb-doctor`` +===================================================== + +In a previous `tutorial <./add_model.html>`_ we covered how a model +can be added to the analysis. However, it can be challenging to know +which variables are needed. To this end we have created a tool called +``ilamb-doctor`` which tries to diagnose what is incorrect or missing +from a given analysis. It takes options similar to ``ilamb-run`` and +is used in the following way:: + + [ILAMB/test]$ ilamb-doctor --config test.cfg --model_root ${ILAMB_ROOT}/MODELS/CLM + + Searching for model results in /Users/ncf/ILAMB//MODELS/CLM + + CLM40n16r228 + CLM45n16r228 + CLM50n18r229 + + We will now look in each model for the variables in the ILAMB + configure file you specified (test.cfg). The color green is used to reflect + which variables were found in the model. The color red is used to + reflect that a model is missing a required variable. + + Biomass/GlobalCarbon CLM40n16r228 biomass or cVeg + GrossPrimaryProductivity/Fluxnet CLM40n16r228 gpp + GrossPrimaryProductivity/GBAF CLM40n16r228 gpp + GlobalNetEcosystemCarbonBalance/Hoffman CLM40n16r228 nbp + NetEcosystemExchange/GBAF CLM40n16r228 gpp, rh, and ra + TerrestrialWaterStorageAnomaly/GRACE CLM40n16r228 tws + Albedo/MODIS CLM40n16r228 rsus and rsds + SurfaceAirTemperature/CRU CLM40n16r228 tas + Precipitation/GPCP2 CLM40n16r228 pr + Biomass/GlobalCarbon CLM45n16r228 biomass or cVeg + GrossPrimaryProductivity/Fluxnet CLM45n16r228 gpp + GrossPrimaryProductivity/GBAF CLM45n16r228 gpp + GlobalNetEcosystemCarbonBalance/Hoffman CLM45n16r228 nbp + NetEcosystemExchange/GBAF CLM45n16r228 gpp, rh, and ra + TerrestrialWaterStorageAnomaly/GRACE CLM45n16r228 tws + Albedo/MODIS CLM45n16r228 rsus and rsds + SurfaceAirTemperature/CRU CLM45n16r228 tas + Precipitation/GPCP2 CLM45n16r228 pr + Biomass/GlobalCarbon CLM50n18r229 biomass or cVeg + GrossPrimaryProductivity/Fluxnet CLM50n18r229 gpp + GrossPrimaryProductivity/GBAF CLM50n18r229 gpp + GlobalNetEcosystemCarbonBalance/Hoffman CLM50n18r229 nbp + NetEcosystemExchange/GBAF CLM50n18r229 gpp, rh, and ra + TerrestrialWaterStorageAnomaly/GRACE CLM50n18r229 tws + Albedo/MODIS CLM50n18r229 rsus and rsds + SurfaceAirTemperature/CRU CLM50n18r229 tas + Precipitation/GPCP2 CLM50n18r229 pr + +Here we have run the command on some inputs in our ``test`` +directory. You will see a list of the confrontations we run and the +variables which are required or their synonyms. What is missing in +this tutorial is the text coloring which will indicate if a given +model has the required variables. + +In the future we will add more intelligence to this tool to help +diagnose potential problems. + + diff --git a/doc/ilamb_fetch.rst b/doc/ilamb_fetch.rst new file mode 100644 index 00000000..8500eb07 --- /dev/null +++ b/doc/ilamb_fetch.rst @@ -0,0 +1,37 @@ +Obtaining the ILAMB Data with ``ilamb-fetch`` +============================================= + +In previous tutorials we provided links to download a small dataset +for the purposes of demonstration. However we have another mechanism +for downloading the observational datasets which ILAMB needs. From a +commandline prompt, run ``ilamb-fetch``. You should see output similar +to the following:: + + Comparing remote location: + + http://ilamb.ornl.gov/ILAMB-Data/ + + To local location: + + ./ + + I found the following files which are missing, out of date, or corrupt: + + .//DATA/twsa/GRACE/twsa_0.5x0.5.nc + .//DATA/rlus/CERES/rlus_0.5x0.5.nc + ... + + Download replacements? [y/n] + +This tool looks at a remote location (by default the location of the +land datasets) and compares it to a local location (by defult +``ILAMB_ROOT`` or ``./``). It detects for the presence and version of +the data on your local machine and populates a list for download. The +tool will then prompt you to rerun to check for file validity. + +This tool can be used to download other data collections as well. If +you need the ocean IOMB data, then you can change the remote location +by running:: + + ilamb-fetch --remote_root http://ilamb.ornl.gov/IOMB-Data/ + diff --git a/doc/ilamb_run.rst b/doc/ilamb_run.rst new file mode 100644 index 00000000..7776126d --- /dev/null +++ b/doc/ilamb_run.rst @@ -0,0 +1,177 @@ +Controlling the Analysis with ``ilamb-run`` Options +=================================================== + +While the basic operation of ``ilamb-run`` is covered in previous +tutorials, there are more options which can be used to control what +happens in a run. In this tutorial we will describe these options and +motivate when they are useful. + +Limiting the analysis +--------------------- + +The configure file specifies the confrontations which will be +performed. However, for many reasons in the development and debugging +process it may be advantageous to run ILAMB on only a subset of the +configure file. You can control this by specifying strings which must +be in the confrontation longname. For example, consider the following +arguments:: + + ilamb-run --config ilamb.cfg --model_root ${ILAMB_ROOT}/MODELS --confrontation CERES + +This line will run only the CERES confrontations found in the +``ilamb.cfg``. The same can be achieved for models with the +``--models`` option. In this case, you must specify the model names +which you wish to run against in a given run. + +To see how this is useful, imagine you have 3 models in your +``--model_root`` directory, but for one you needed to replace all the +model result files. So you need to rerun ILAMB, but not on all three +models. With this option, you can rerun ILAMB on just the needed +model, saving time. If this model name is ``modelC``, then the option +would be:: + + ilamb-run --config ilamb.cfg --model_root ${ILAMB_ROOT}/MODELS --models modelC --clean + +The ``--clean`` option here tells ILAMB to rerun the analysis even if +intermediate files are present, essentially cleaning out the +``modelC`` ILAMB contents and recomputing them, while leaving the rest +of the models untouched. + +Defining models +--------------- + +There are two ways to define models for an analysis. The first was +covered in previous tutorials. The ``--model_root`` option is used to +specify a location whose subdirectories (not recursive) are +initialized as separate models. This is the recommended method as it +is simple and fast. + +One issue that comes up is that model developers want to run ILAMB, +but during the development process as a sanity check on the model +performance. This means that model results may not be available in the +contemporary period. The ``--model_year`` option can be used to shift +the time of all models in an analysis by a fixed number of years. So +if a model run starts in 1850, but you wish to treat these results as +if they were the year 2000 (for comparing to some dataset), then the +appropriate command is ``--model_year 1850 2000``. + +While helplful, we also acknowledge that globally applying a shift in +time across all models might not be desirable. It is likely that a +user has cached the results from previous versions of the model which +have been spun up and run over the contemporary period. To this end we +provide a more detailed model setup option ``--model_setup +file.txt``. The contents of ``file.txt`` could look something like the +following:: + + # Model Name, Location of Files , Shift From, Shift To + CLM40 , ABSOLUTE/PATH/TO/CLM40 + CLM45 , ABSOLUTE/PATH/TO/CLM45 + CLM5X , ABSOLUTE/PATH/TO/CLM5X, 1850 , 2000 + CLM5Y , ABSOLUTE/PATH/TO/CLM5Y, 1850 , 2000 + +The text file is in simple comma delimited form with either 2 or 4 +columns. Lines which begin with ``#`` will be ignored. The first +column is the name which you wish to assign to the model and the +second is the absolute path of the results. The third and fourth +columns define the shift in years for each model. If there are only +two columns of data, we will not apply a shift. + +To add some context, this option may be useful in the model +development process. In our sample setup, we have two model versions +CLM4 and CLM4.5 whose results are archived and will not be changing +and thus do not need time shifted. We have setup two versions of CLM5, +X and Y which represent perhaps different parameterization choices, +shifted because we have not spun these models up. The ILAMB results +should be interpretted carefully, but comparing two parameterizations +in this way might provide insight into key differences. + +Regions +------- + +The ILAMB analysis can be performed on an arbitrary number of regions +which may be defined in many ways. The ILAMB package comes with a set +of these regions predefined which are used in the `Global Fire +Emissions Database `_. They are: + + * bona, Boreal North America + * tena, Temperate North America + * ceam, Central America + * nhsa, Northern Hemisphere South America + * shsa, Southern Hemisphere South America + * euro, Europe + * mide, Middle East + * nhaf, Northern Hemisphere Africa + * shaf, Southern Hemisphere Africa + * boas, Boreal Asia + * ceas, Central Asia + * seas, Southeast Asia + * eqas, Equatorial Asia + * aust, Australia + +The first entry in the above list is a region label. To avoid +confusion these should not have spaces or special characters. The +second entry is the name itself which will appear in the pull down +menus on the webpage otput. To run the ilamb analysis over particular +regions, use the ``--regions`` option and include the region labels +delimited by spaces. + +As we anticipate that users will desire to define their own regions, +we have provided this capability in two forms. The first is region +definition by latitude and longitude bounds which can be done in the +form of a text file in the following comma delimited format:: + + #label,name ,lat_min,lat_max,lon_min,lon_max + usa,Continental US, 24, 50, -126, -66 + +Additional rows in the same format may be included to define more +regions in the same file. The first column is the label to be used, +followed by the region name. Then the minimum and maximum bounds on +the latitude and longitude are specified. Note that longitude values +are expected on the [-180,180] interval. In this current iteration +regions cannot be specified which span the international dateline. + +The second form is by creating a mask using a netCDF4 file. We will go +into more detail about the format of the netCDF4 file for defining +masks in its own `tutorial <./custom_regions.html>`_. So if the sample +text file above is called ``regions.txt`` and we have a netCDF4 file +called ``amazon.nc`` with a region label ``amazon``, then the ILAMB +analysis can be performed over additional regions by specifying:: + + --define_regions regions.txt amazon.nc --regions global usa amazon + +In its current form, ILAMB expects that the analysis will be performed +over at least the global region. All overall scores are based on +information in that region. This is a restriction we are working to +loosen. If you need to circumvent this, you can redefine the region +labeled ``global`` to meet your needs. + + +Other options +------------- + +* ``--filter``, Sometimes a model has output from several runs or + experiments included in the same location. This is frequently + indicated by some string in the filename, such as ``r0i0p0``. This + option may be used to require that files contain a specific string + to be considered in the list of variables models provide. +* ``--skip_plots``, The plotting phase of ILAMB is expensive. It + takes a long time to generate all the thousands of graphics that get + produced. It may be that you are running ILAMB for the summary + graphic/information only. In this case you can run with this option + to speed up the run. +* ``--build_dir``, The default location for generating the ILAMB + output is a ``_build`` directory placed in the directory from which + you ran ``ilamb-run``. While fine for everyday use, you may wish to + control the location of this directory. +* ``--disable_logging``, ILAMB uses a MPI logger to write exceptions + and progress to a log file in a thread-lock fashion. This helps + tremendously when tracking down user errors. However, we have found + that on some systems (e.g. geysey at NCAR) this causes ``ilamb-run`` to + lock for reasons we do not yet understand. Disabling the logging + seems to circumvent the issue. If you find that ``ilamb-run`` makes + no progress when running in parallel, you might try this option. +* ``--quiet``, By default, ILAMB spits out progress information to + the screen. If you wish to supress this information, run with this + option. + + diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 00000000..3ad2fddb --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,39 @@ +The ILAMB Benchmarking System +============================= + +The International Land Model Benchmarking (ILAMB_) project is a +model-data intercomparison and integration project designed to improve +the performance of land models and, in parallel, improve the design of +new measurement campaigns to reduce uncertainties associated with key +land surface processes. Building upon past model evaluation studies, +the goals of ILAMB are to: + +* develop internationally accepted benchmarks for land model + performance, +* promote the use of these benchmarks by the international community + for model intercomparison, +* strengthen linkages between experimental, remote sensing, and + climate modeling communities in the design of new model tests and + new measurement programs, and +* support the design and development of a new, open source, + benchmarking software system for use by the international community. + +It is the last of these goals to which this page is concerned. We have +developed a python-based generic benchmarking system, for which the +source code may be found on bitbucket_. The development is open and +patches are welcome. The main output of our package comes in the form +of a HTML site_ which can be navigated to explore and understand the +results. + +Documentation +------------- + +.. toctree:: + :maxdepth: 2 + + tutorial + packageapi + +.. _ILAMB: http://www.ilamb.org/ +.. _bitbucket: https://bitbucket.org/ncollier/ilamb +.. _site: http://www.climatemodeling.org/~nate/ILAMB/index.html diff --git a/doc/install.rst b/doc/install.rst new file mode 100644 index 00000000..84a85043 --- /dev/null +++ b/doc/install.rst @@ -0,0 +1,278 @@ +Installation +============ + +The ILAMB benchmarking software is written in python 2.7x and depends +on a few packages which extend the language's usefulness in scientific +applications. The easiest way to install the ILAMB package and its +dependencies is to get them from the Python Package Index (pypi_) using +pip_. To do so, type:: + + pip install ILAMB --user + +at the commandline and pip_ will install most everything +automatically. Please note that I have post-pended a ``--user`` flag +to the command. This is not strictly necessary yet recommended as it +will cause the packages to be installed to a *local* directory in +place of the *system* directory. This allows packages to be installed +without administrator privileges, and leaves your system installation +untouched, which may be important if you need to revert to a previous +state. You should see that a number of packages in addition to ILAMB +had their versions checked or were upgraded/installed as needed. These +include: + +* numpy_, the fundamental package for scientific computing with python +* matplotlib_, a 2D plotting library which produces publication quality figures +* netCDF4_, a python/numpy interface to the netCDF C library (you must have the C library installed) +* sympy_, a python library for symbolic mathematics +* mpi4py_, a python wrapper around the MPI library (you must have a MPI implementation installed) +* cf_units_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) + +I have designated that a few of these dependencies are python +interfaces to C libraries and so the library must also be installed +separately. See the individual package websites for more +details. Ideally, pip_ would be able to install all our dependencies +automatically. + +Unfortunately, one of our dependencies must be installed +manually. Despite being listed in the Python Package Index, basemap_ +cannot be installed with pip_. The meta information is listed, but the +package source is too large to be hosted and so installation fails. We +will need to install basemap_ from the source hosted on github_. This +is a useful process to understand as any python package can be +installed in this way. First, clone the git repository:: + + git clone https://github.com/matplotlib/basemap.git + +This will take some time as the repository is large (>100Mb) due to it +including some high resolution map data used in plotting. Enter into +the cloned directory and take note of a file called ``setup.py``. All +python packages will contain a file called ``setup.py`` in the top +level directory. This is where a developer tells python how to install +the package. Now we type:: + + python setup.py install --user + +and the package should install. Hopefully in the future basemap_ will +improve their installation process in pypi_, but in the meantime it +must be installed as we have detailed here. + +You can test your installation by the following command:: + + python -c "import ILAMB; print ILAMB.__version__" + +If you get a numerical output, then the package has been successfully +installed. + +Now what? +--------- + +If you got the installation to work, then you should proceed to +working on the next tutorial. Before leaving this page, there are a +few extra steps we recommend you perform. If you installed ILAMB using +the ``--user`` option, the executeable script ``ilamb-run`` will be +placed inside ``${HOME}/.local/bin``. You may need to postpend this +location to your ``PATH`` environment variable:: + + export PATH=${PATH}:${HOME}/.local/bin + +assuming you are using a ``bash`` environment. This will make the +``ilamb-run`` script executeable from any directory. Also, if you are +connecting to a machine remotely in order to run ILAMB, you may wish +to change the matplotlib_ backend to something that does not generate +interactive graphics:: + + export MPLBACKEND=Agg + +This will allow ILAMB to run without needing to connect with the +``-X`` option. + +What can go wrong? +------------------ + +In an ideal world, this will work just as I have typed it to +you. However, if you are here, something has happened and you need +help. Installing software is frequently all about making sure things +get put in the correct place. You may be unaware of it, but you may +have several versions of python floating around your machine. The pip_ +software we used to install packages needs to match the version of +python that we are using. Try typing:: + + pip --version + which python + python --version + +where you should see something like:: + + pip 9.0.1 from /usr/local/lib/python2.7/site-packages (python 2.7) + /usr/local/bin/python + Python 2.7.13 + +Notice that in my case the pip_ I am using matches the version and +location of the python. This is important as pip_ will install +packages into the locations which my python will find. If your pip_ +is, say, for python 3 but you are using python 2.7 then you will +install packages successfully, but they will seem to not be available +to you. The same thing can happen if you have the right version of +python, but it is installed in some other location. + +Now we provide some interpretation of the possible output you got from +the test. If you ran:: + + python -c "import ILAMB; print ILAMB.__version__" + +and you see something like:: + + Traceback (most recent call last): + File "", line 1, in + ImportError: No module named ILAMB + +Then the package did not correctly install and you need to look at the +screen output from the install process to see what went wrong. You may +also have observed an import error of a different sort. When you +import the ILAMB package, we check the version of all the packages on +which we depend. You could see an error text like the following:: + + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/lib/python2.7/site-packages/ILAMB/__init__.py", line 29, in + (key,__version__,key,requires[key],pkg.__version__)) + ImportError: Bad numpy version: ILAMB 0.1 requires numpy >= 1.9.2 got 1.7 + +This means that while the ``numpy`` package is installed on your +system, its version is too old and you need to use pip_ to upgrade it +to at least the version listed. You may also see a message like the +following:: + + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/lib/python2.7/site-packages/ILAMB/__init__.py", line 25, in + pkg = __import__(key) + ImportError: No module named numpy + +This means that we require the ``numpy`` package but you do not have +it installed at all. This should not happen, but if it does, use pip_ +to resolve this problem. It is possible that despite a seemingly +smooth installation of basemap_, ILAMB complains about there not being +a module called basemap:: + + Traceback (most recent call last): + File "", line 1, in + File "/usr/local/lib/python2.7/site-packages/ILAMB/__init__.py", line 24, in + pkg = __import__(key, globals(), locals(), [froms[key]]) + ImportError: No module named basemap + +Basemap is a little trickier than other python packages because it is +a *plugin* to the maplotlib package. My recommendation if you are +seeing this message is to install matplotlib in a local location and +upgrade it to the most up to date version:: + + pip install matplotlib --user --upgrade + +and then install basemap also using the ``--user`` option. This should +ensure that matplotlib toolkits find the basemap extension. + +Institutional machines +---------------------- + +While ILAMB is portable and runs on your laptop or workstation, you +may be working remotely on an institutional machine where you have +modeling output results. Many times these machines already have our +dependencies installed and we only have need to load them using +environment modules. See your computing center usage tutorials for +more information on how these work. Typically, you can search for +available software by:: + + module avail search_term + +for example. And then is loaded by:: + + module load software_name + +In an effort to make it simpler for users to get ILAMB running, we are +listing installation instructions here for a number of machines with +which we have experience. In each case, we have tried to start with +only the default software enabled. Your mileage may vary as the +software stacks at these centers frequently change. + +It is relevant to note that ILAMB uses MPI to parallelize the +benchmarking process. Thus MPI is called even if you are running on +just one process. Because of this, many if not all institutional +machines will then require you to launch a job though a submission +script. See your computing center for details. + +Edison @ NERSC +~~~~~~~~~~~~~~ + +.. code-block:: bash + + module load python + module load numpy + module load matplotlib + module load basemap + module load mpi4py + module load netcdf + module load netcdf4-python + module load udunits + pip install ILAMB --user + export PATH=${PATH}:${HOME}/.local/edison/2.7.9/bin/ + +The matplotlib on Edison is pretty old and control of the backend is +not possible using the ``MPLBACKEND`` environment variable. If you +want to run without needing to connect with the ``-X`` option, you +will need to change the backend through the ``matplotlibrc`` +file. First, copy this file from the system level, into your local +configure directory:: + + cp /usr/common/software/python/matplotlib/1.4.3/lib/python2.7/site-packages/matplotlib-1.4.3-py2.7-linux-x86_64.egg/matplotlib/mpl-data/matplotlibrc ${HOME}/.config/matplotlib/ + +Next open the local copy of the file with a editor and search for +``backend`` changing the value to the right of the colon to ``Agg``. + +Rhea @ OLCF +~~~~~~~~~~~ + +.. code-block:: bash + + module rm PE-intel + module load PE-gnu + module load netcdf + module load udunits + module load geos + module load python + module load python_setuptools + module load python_pip + module load python_numpy + module load python_matplotlib + module load python_matplotlib_basemap_toolkit + module load python_netcdf4 + module load python_mpi4py + pip install ILAMB --user + export PATH=${PATH}:${HOME}/.local/bin/ + # The udunits module file should do this but doesn't + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/sw/rhea/udunits/2.1.24/rhel6.6_gnu4.4.7/lib/ + +The matplotlib on Rhea is pretty old and control of the backend is +not possible using the ``MPLBACKEND`` environment variable. If you +want to run without needing to connect with the ``-X`` option, you +will need to change the backend through the ``matplotlibrc`` +file. First, copy this file from the system level, into your local +configure directory:: + + cp /sw/rhea/python_matplotlib/1.4.3/python2.7.9_numpy1.9.2_gnu4.8.2/lib64/python2.7/site-packages/matplotlib-1.4.3-py2.7-linux-x86_64.egg/matplotlib/mpl-data/matplotlibrc ${HOME}/.config/matplotlib/ + +Next open the local copy of the file with a editor and search for +``backend`` changing the value to the right of the colon to ``Agg``. + + + +.. _pypi: https://pypi.python.org/pypi +.. _pip: https://pip.pypa.io/en/stable/ +.. _repository: https://bitbucket.org/ncollier/ilamb +.. _numpy: https://www.numpy.org/ +.. _matplotlib: https://matplotlib.org/ +.. _netCDF4: https://github.com/Unidata/netcdf4-python +.. _cf_units: https://github.com/SciTools/cf-units +.. _basemap: https://github.com/matplotlib/basemap +.. _sympy: https://www.sympy.org/ +.. _mpi4py: https://pythonhosted.org/mpi4py/ +.. _github: https://github.com diff --git a/doc/overview.rst b/doc/overview.rst new file mode 100644 index 00000000..cb913503 --- /dev/null +++ b/doc/overview.rst @@ -0,0 +1,203 @@ +Package Overview +================ + +This tutorial is meant to provide some basic understanding of how the +ILAMB python package works and is organized. The level of information +communicated is aimed at a developer who wants to implement his own +benchmark into the system and needs to understand how to go about +doing so. We will start here with a few simple examples which +demonstrate functionality, and layer in complexity in subsequent +tutorials. + +The ILAMB python package consists of four main objects: ``Variable``, +``ModelResult``, ``Confrontation``, and ``Scoreboard``. We will +discuss the first three in this tutorial. + +The Variable Object +------------------- + +The ``Variable`` object is the basic building block of the ILAMB +package. It keeps track of dimensions as the netCDF variables do, but +also provides data-aware analysis routines which operate on the data +in an intelligent manner. For example, consider the following variable +we can create from the data used in a `previous <./first_steps.html>`_ +tutorial:: + + from ILAMB.Variable import Variable + import os + v = Variable(filename = os.environ["ILAMB_ROOT"] + "/MODELS/CLM40cn/rsus/rsus_Amon_CLM40cn_historical_r1i1p1_185001-201012.nc", + variable_name = "rsus") + +The first two lines here import the functionality we need. The first +imports the ``Variable`` object from the ILAMB package and the second +imports a standard python package which allows us to interact with the +operating system. We need this package to gain access to the +``ILAMB_ROOT`` environment variable explained in the `First Steps +<./first_steps.html>`_ tutorial. Then we create a variable object by +specifying the filename as well as the name of the variable which want +to extract from inside. We can then print this variable:: + + print v + +which will display the following information to the screen:: + + Variable: rsus + -------------- + unit: W m-2 + isTemporal: True (1932) + isSpatial: True (192,288) + nDatasites: N/A + dataShape: (1932, 192, 288) + dataMax: 4.028994e+02 + dataMin: 0.000000e+00 + dataMean: 6.153053e+01 + +The ``Variable`` object understands the dimensionality of the data as +well as its unit and then provides analysis routines which operate +intelligently depending on the type of data present. So for example, +we can find the mean value over the time period of the data by:: + + print v.integrateInTime(mean=True) + +which will display:: + + Variable: rsus_integrated_over_time_and_divided_by_time_period + -------------------------------------------------------------- + unit: W m-2 + isTemporal: False + isSpatial: True (192,288) + nDatasites: N/A + dataShape: (192, 288) + dataMax: 1.386898e+02 + dataMin: 9.787394e+00 + dataMean: 6.148656e+01 + +The returned value is another ``Variable`` object, which now has lost +its temporal dimension because this was integrated out. It represents +the average in time at each grid cell in the original data. The +``Variable`` object has a lot of functionality and will be expanded to +meet needs of developers. For a more complete explanation of the +interface, consult the `documentation +<_generated/ILAMB.Variable.Variable.html>`_. However, the point of +this tutorial is that we use the ``Variable`` object to perform +analysis operations in a uniform and flexible manner. Its full +functionality will be covered in more detail in a future tutorial. + +The ModelResult Object +---------------------- + +The ``ModelResult`` object is meant to make getting a model's +variables easy. We anticipate that researchers will have placed all a +model run's results in a single directory bearing the model name as +well as perhaps version, or forcing. To create this object, we simply +point to the top-level directory where the results are contained:: + + from ILAMB.ModelResult import ModelResult + m = ModelResult(os.environ["ILAMB_ROOT"] + "/MODELS/CLM40cn", + modelname = "CLM40cn") + +When we instantiate the model result, internally we search for all +variables found in all netCDF files contained underneath this +top-level directory. This makes extracting variables simple. We can +extract the same variable as above, but in a much more simple manner +once the model result has been defined:: + + v = m.extractTimeSeries("rsus") + print v + +yields the folling screen output:: + + Variable: rsus + -------------- + unit: W m-2 + isTemporal: True (1932) + isSpatial: True (192,288) + nDatasites: N/A + dataShape: (1932, 192, 288) + dataMax: 4.028994e+02 + dataMin: 0.000000e+00 + dataMean: 6.153053e+01 + +In addition to making the aquisition of model data simpler, if land +fractions and areas are relevant (that is, the variable is spatial), +we will apply them to the variable automatically. The user is only +responsible for having the appropriate datafiles (``areacella`` and +``sftlf``) in the model's directory. Extracting the variables from the +``ModelResult`` object ensures that we handle model data +consistently. The ``ModelResult`` `interface +<_generated/ILAMB.ModelResult.ModelResult.html>`_ is much smaller, and +will be expanded in the future. + +The Confrontation Object +------------------------ + +The ``Confrontation`` object manages the benchmark dataset, the +extraction of the data from the model, the anaylsis performed, as well +as the plotting and generating of results. As a developer, you will be +writing your own ``Confrontation`` objects so it is important to +understand what they are and how they work. First, we will initialize +one to help illustrate their functionality:: + + from ILAMB.Confrontation import Confrontation + c = Confrontation(source = os.environ["ILAMB_ROOT"] + "/DATA/rsus/CERES/rsus_0.5x0.5.nc", + name = "CERES", + variable = "rsus") + +As before, we specify the source data relative to the ``ILAMB_ROOT`` +variable. We also have given the confrontation a name and a variable +to expect. There are two main functions to highlight at this +point. The first has to do with preparing data for comparison:: + + obs,mod = c.stageData(m) + +The ``stageData`` functionality returns both the observational and +model datasets as ``Variable`` objects and in a form in which they are +comparable. For example, if we again print ``mod`` here, we is +analagous to ``v`` above, we see:: + + Variable: rsus + -------------- + unit: W/m2 + isTemporal: True (131) + isSpatial: True (192,288) + nDatasites: N/A + dataShape: (131, 192, 288) + dataMax: 4.028824e+02 + dataMin: 0.000000e+00 + dataMean: 6.035579e+01 + +However, the temporal dimension has been greatly reduced (from 1932 +entries down to 131). This is because the observational dataset is +contemporary and the model starts back in 1850. In addition to +clipping the data, we also convert units if appropriate. + +The second main function of the ``Confrontation`` is to perform the +desired analysis. This happens in the ``confront`` functionality:: + + c.confront(m) + +Where ``m`` is the ``ModelResult`` being passed in. This routine calls +``stageData`` internally, and then performs the desired analysis. The +function does not return anything, but generates an analysis file +which contains the results of the analysis. In this case, you will +find two netCDF4 files in your directory: ``CERES_Benchmark.nc`` and +``CERES_CLM40cn.nc``. You can use ``ncdump`` or ``ncview`` (from +NetCDF Tools) to examine the contents of these files. + +The ``Confrontation`` also handles the plotting and generation of HTML +output pages, but this is a more advanced aspect of the object, +detailed in its interface, shown `here +<_generated/ILAMB.Confrontation.Confrontation.html>`_. + +Summary +------- + +While there is much more to learn in understanding the ILAMB python +package, these are the basic objects and concepts you will need to +grasp to implement new benchmarks and analysis. The basic idea is that +we have encapsulated the notion of benchmark datasets and their +accompanying analysis into a ``Confrontation`` class which operates on +the ``ModelResult`` represented as a ``Variable``. What we have done +here manually is part of what happens inside of the ``ilamb-run`` +script, which we executed in previous tutorials. + diff --git a/doc/packageapi.rst b/doc/packageapi.rst new file mode 100644 index 00000000..ac2996a1 --- /dev/null +++ b/doc/packageapi.rst @@ -0,0 +1,132 @@ +Package Contents +================ + +We have written this python package with the intent that it be easy to +use on a variety of levels. For the researcher who just wants to setup +basic model-benchmark confrontations using our mean-state analysis, +they may never need to program in python. For those who wish to +perform more complicated comparisons or pose their own analysis, we +present the package contents with some underlying philosophy +concerning the purpose of each object. + +Variable +-------- + +This class encapsulates data defined in space/time with common +analysis capabilities. You can think of it as a netCDF variable with +analysis routines that are aware of the spatial/temporal nature of the +data. It is the basic building block on which the analysis portion of +the package is built. + +.. currentmodule:: ILAMB.Variable +.. autosummary:: + :toctree: _generated + :nosignatures: + :template: class.rst + + Variable + +ModelResults +------------ + +This model result class aims to handle model queries efficiently and +uniformly without requiring the user to interface with the source +netCDF files themselves. + +.. currentmodule:: ILAMB.ModelResult +.. autosummary:: + :toctree: _generated + :nosignatures: + :template: class.rst + + ModelResult + +Confrontations +-------------- + +In the ILAMB parlance, a *confrontation* is a observational benchmark +dataset and its accompanying analysis. Thus we have implemented the +``Confrontation`` base class which will automatically handle the +querying of data from the model and perform our mean-state +analysis. In anticipation that users will want to write their own +analysis, or have benchmark datasets which compare to a non-algebraic +combination of model inputs, we support confrontation classes which +derive from the base class. The other classes listed here are examples +of how this may be accomplished. + +.. currentmodule:: ILAMB.Confrontation +.. autosummary:: + :toctree: _generated + :nosignatures: + :template: class.rst + + Confrontation + +.. currentmodule:: ILAMB.ConfNBP +.. autosummary:: + :toctree: _generated + :nosignatures: + :template: class.rst + + ConfNBP + +Regions +------- + +This class unifies treatment of regions in ILAMB by allowing regions +to be defined by latitude and longitude bounds as well by netCDF4 +files containing integer indices. + +.. currentmodule:: ILAMB.Regions +.. autosummary:: + :toctree: _generated + :nosignatures: + :template: class.rst + + Regions + +ilamblib +-------- + +This module collects many routines which may be helpful but do not +belong with any of the above objects. + +.. currentmodule:: ILAMB.ilamblib +.. autosummary:: + :toctree: _generated + + GenerateDistinctColors + ClipTime + ConvertCalendar + ComposeSpatialGrids + CellAreas + GlobalLatLonGrid + NearestNeighborInterpolation + TrueError + SympifyWithArgsUnits + FromNetCDF4 + CombineVariables + Score + ScoreSeasonalCycle + MakeComparable + AnalysisMeanState + AnalysisRelationship + +Post +---- + +Finally we provide a module with many ways of presenting these +results. While we want to keep data formats open such that researchers +may use plotting code of their own in their own language, we also want +to provide a robust set of tools for generating different views and +organizations of the confrontation results. + +.. currentmodule:: ILAMB.Post +.. autosummary:: + :toctree: _generated + + ColorBar + TaylorDiagram + WhittakerDiagram + RegisterCustomColormaps + BenchmarkSummaryFigure diff --git a/doc/tutorial.rst b/doc/tutorial.rst new file mode 100644 index 00000000..c3c4f5b7 --- /dev/null +++ b/doc/tutorial.rst @@ -0,0 +1,39 @@ +Tutorials +========= + +Beginner Level +-------------- + +The following tutorials are aimed at those who want to use the package +as it is. Perhaps you have an additional dataset to add or your own +model results to evaluate. This is the place to start learning what +you need to know! + +.. toctree:: + :maxdepth: 1 + + install + first_steps + add_model + add_data + format_data + ilamb_fetch + ilamb_run + ilamb_doctor + custom_regions + + +Developer Level +--------------- + +These tutorials start to explain the package functionality in more +depth. The level assumes familiarity with python as well as all the +beginner level tutorials. These tutorials are if you want to develop +your own benchmarks or metrics and contribute to the ILAMB python +package! + +.. toctree:: + :maxdepth: 1 + + overview + confront diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..b189267f --- /dev/null +++ b/setup.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python +from setuptools import setup +from codecs import open +import subprocess +import os + +VERSION = '2.3' + +def git_version(): + """ + Return the sha1 of local git HEAD as a string. + """ + def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'PYTHONPATH']: + v = os.environ.get(k) + if v is not None: + env[k] = v + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + out = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + env=env + ).communicate()[0] + return out + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + git_revision = out.strip().decode('ascii') + except OSError: + git_revision = "unknown-git" + return git_revision + +def write_text(filename, text): + try: + with open(filename, 'w') as a: + a.write(text) + except Exception as e: + print(e) + +def write_version_py(filename=os.path.join('src/ILAMB', 'generated_version.py')): + cnt = """ +# THIS FILE IS GENERATED FROM ILAMB SETUP.PY +short_version = '%(version)s' +version = '%(version)s' +git_revision = '%(git_revision)s' +full_version = '%(version)s (%%(git_revision)s)' %% { + 'git_revision': git_revision} +release = %(isrelease)s +if not release: + version = full_version +""" + FULL_VERSION = VERSION + if os.path.isdir('.git'): + GIT_REVISION = git_version() + ISRELEASED = False + else: + GIT_REVISION = "RELEASE" + ISRELEASED = True + + FULL_VERSION += '.dev-' + GIT_REVISION + text = cnt % {'version': VERSION, + 'full_version': FULL_VERSION, + 'git_revision': GIT_REVISION, + 'isrelease': str(ISRELEASED)} + write_text(filename, text) + + +here = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: + long_description = f.read() + +write_version_py() +setup( + name='ILAMB', + version=VERSION, + description='The International Land Model Benchmarking Package', + long_description=long_description, + url='https://bitbucket.org/ncollier/ilamb', + author='Nathan Collier', + author_email='nathaniel.collier@gmail.com', + #license='MIT', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Science/Research', + 'Topic :: Scientific/Engineering', + #'License :: OSI Approved :: MIT License', + 'Operating System :: MacOS', + 'Operating System :: POSIX', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + ], + keywords=['benchmarking','earth system modeling','climate modeling','model intercomparison'], + packages=['ILAMB'], + package_dir={'ILAMB' : 'src/ILAMB'}, + scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor','bin/ilamb-table'], + install_requires=['numpy>=1.11.0', + 'matplotlib>=1.4.3', + #'basemap>=1.0.7', # basemap is in pypi but broken, need to manually install + 'netCDF4>=1.1.4', + 'cf_units>=2.0.0', + 'sympy>=0.7.6', + 'mpi4py>=1.3.1', + 'scipy>=0.9.0'] +) diff --git a/src/ILAMB/ConfDiurnal.py b/src/ILAMB/ConfDiurnal.py new file mode 100644 index 00000000..de040f2f --- /dev/null +++ b/src/ILAMB/ConfDiurnal.py @@ -0,0 +1,199 @@ +from ILAMB.Confrontation import Confrontation +from ILAMB.Confrontation import getVariableList +import matplotlib.pyplot as plt +import ILAMB.Post as post +from scipy.interpolate import CubicSpline +from mpl_toolkits.basemap import Basemap +from ILAMB.Variable import Variable +from netCDF4 import Dataset +import ILAMB.ilamblib as il +import numpy as np +import os,glob + +def DiurnalReshape(var): + dt = (var.time_bnds[:,1]-var.time_bnds[:,0]).mean() + spd = int(round(1./dt)) + begin = np.argmin(var.time[:(spd-1)]%spd) + end = begin+int(var.time[begin:].size/float(spd))*spd + shp = (-1,spd) + var.data.shape[1:] + cycle = var.data[begin:end].reshape(shp) + tbnd = var.time_bnds[begin:end,:].reshape((-1,spd,2)) % 1 + tbnd = tbnd[0,...] + tbnd[-1,1] = 1. + t = tbnd.mean(axis=1) + return cycle,t,tbnd + +class ConfDiurnal(Confrontation): + """A confrontation for examining the diurnal + """ + def __init__(self,**keywords): + + # Calls the regular constructor + super(ConfDiurnal,self).__init__(**keywords) + + # Setup a html layout for generating web views of the results + pages = [] + + # Mean State page + pages.append(post.HtmlPage("MeanState","Mean State")) + pages[-1].setHeader("CNAME / RNAME / MNAME") + pages[-1].setSections(["Diurnal cycle"]) + pages.append(post.HtmlAllModelsPage("AllModels","All Models")) + pages[-1].setHeader("CNAME / RNAME") + pages[-1].setSections([]) + pages[-1].setRegions(self.regions) + pages.append(post.HtmlPage("DataInformation","Data Information")) + pages[-1].setSections([]) + pages[-1].text = "\n" + with Dataset(self.source) as dset: + for attr in dset.ncattrs(): + pages[-1].text += "

      %s: %s

    \n" % (attr,dset.getncattr(attr).encode('ascii','ignore')) + self.layout = post.HtmlLayout(pages,self.longname) + + def stageData(self,m): + + obs = Variable(filename = self.source, + variable_name = self.variable, + alternate_vars = self.alternate_vars) + if obs.time is None: raise il.NotTemporalVariable() + self.pruneRegions(obs) + + # Try to extract a commensurate quantity from the model + mod = m.extractTimeSeries(self.variable, + alt_vars = self.alternate_vars, + expression = self.derived, + initial_time = obs.time_bnds[ 0,0], + final_time = obs.time_bnds[-1,1], + lats = None if obs.spatial else obs.lat, + lons = None if obs.spatial else obs.lon).convert(obs.unit) + return obs,mod + + def confront(self,m): + + # get the HTML page + page = [page for page in self.layout.pages if "MeanState" in page.name][0] + + # Grab the data + obs,mod = self.stageData(m) + odata,ot,otb = DiurnalReshape(obs) + mdata,mt,mtb = DiurnalReshape(mod) + + n = len(self.lbls) + obs_amp = np.zeros(n) + mod_amp = np.zeros(n) + amp_score = np.zeros(n) + obs_phase = np.zeros(n) + mod_phase = np.zeros(n) + phase_score = np.zeros(n) + for site in range(n): + + # Site name + lbl = self.lbls[site] + skip = False + + # Observational diurnal cycle + tobs = ot + obs.lon[site]/360 + vobs = odata[...,site] + vobs = np.roll(vobs,-tobs.searchsorted(0),axis=1) + tobs = np.roll(tobs,-tobs.searchsorted(0)) + tobs += (tobs<0) + aobs = (vobs.max(axis=1)-vobs.min(axis=1)).mean() + vobs = vobs.mean(axis=0) + if vobs.size == vobs.mask.sum(): skip = True + if not skip: + acyc = CubicSpline(np.hstack([tobs,tobs[0]+1.]), + np.hstack([vobs,vobs[0] ]), + bc_type="periodic") + troot = acyc.derivative().solve() + troot = troot[(troot>=0)*(troot<=1.)] + otmx = troot[acyc(troot).argmax()] + + # Model diurnal cycle + tmod = mt + mod.lon[site]/360 + vmod = mdata[...,site] + vmod = np.roll(vmod,-tmod.searchsorted(0),axis=1) + tmod = np.roll(tmod,-tmod.searchsorted(0)) + tmod += (tmod<0) + amod = (vmod.max(axis=1)-vmod.min(axis=1)).mean() + vmod = vmod.mean(axis=0) + mcyc = CubicSpline(np.hstack([tmod,tmod[0]+1.]), + np.hstack([vmod,vmod[0] ]), + bc_type="periodic") + troot = mcyc.derivative().solve() + troot = troot[(troot>=0)*(troot<=1.)] + mtmx = troot[mcyc(troot).argmax()] + + # Scalars and scores + if skip: + obs_amp [site] = np.nan + obs_phase [site] = np.nan + amp_score [site] = np.nan + phase_score[site] = np.nan + else: + obs_amp [site] = aobs + obs_phase [site] = otmx + amp_score [site] = np.exp(-np.abs(amod-aobs)/aobs) + phase_score[site] = 1-np.abs(mtmx-otmx)/0.5 + mod_amp [site] = amod + mod_phase [site] = mtmx + + # Plot + ts = np.linspace(0,1,100) + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + if not skip: + ax.plot(tobs,vobs,'o',mew=0,markersize=3,color='k') + ax.plot(ts,acyc(ts),'-',color='k') + ax.plot(otmx,acyc(otmx),'o',mew=0,markersize=5,color='k') + ax.plot(tmod,vmod,'o',mew=0,markersize=3,color=m.color) + ax.plot(ts,mcyc(ts),'-',color=m.color) + ax.plot(mtmx,mcyc(mtmx),'o',mew=0,markersize=5,color=m.color) + xt = np.arange(25)[::3] + xtl = ["%02d:00" % xx for xx in xt] + ax.set_xticks (xt/24.) + ax.set_xticklabels(xtl ) + ax.grid(True) + ax.set_xlabel("Mean solar time") + ax.set_ylabel("[%s]" % obs.unit) + plt.savefig(os.path.join(self.output_path,"%s_diurnal_%s.png" % (m.name,lbl))) + plt.close() + + obs_amp = np.ma.masked_invalid(obs_amp) + obs_phase = np.ma.masked_invalid(obs_phase) + amp_score = np.ma.masked_invalid(amp_score) + phase_score = np.ma.masked_invalid(phase_score) + + results = Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="w") + results.setncatts({"name" :m.name, "color":m.color}) + Variable(name="Amplitude global" ,unit=obs.unit,data= mod_amp .mean()).toNetCDF4(results,group="MeanState") + Variable(name="Max time global" ,unit="h" ,data=24*mod_phase.mean()).toNetCDF4(results,group="MeanState") + Variable(name="Amplitude Score global",unit="1" ,data= amp_score.mean()).toNetCDF4(results,group="MeanState") + Variable(name="Phase Score global" ,unit="1" ,data= phase_score.mean()).toNetCDF4(results,group="MeanState") + results.close() + if self.master: + results = Dataset(os.path.join(self.output_path,"%s_Benchmark.nc" % self.name),mode="w") + results.setncatts({"name" :"Benchmark", "color":np.asarray([0.5,0.5,0.5])}) + Variable(name="Amplitude global" ,unit=obs.unit,data= obs_amp .mean()).toNetCDF4(results,group="MeanState") + Variable(name="Max time global" ,unit="h" ,data=24*obs_phase.mean()).toNetCDF4(results,group="MeanState") + results.close() + + def modelPlots(self,m): + + bname = "%s/%s_Benchmark.nc" % (self.output_path,self.name) + fname = "%s/%s_%s.nc" % (self.output_path,self.name,m.name) + if not os.path.isfile(bname): return + if not os.path.isfile(fname): return + + # get the HTML page + page = [page for page in self.layout.pages if "MeanState" in page.name][0] + page.priority = ["Amplitude","Max","Min","Max time","Bias","RMSE","Shift","Score","Overall"] + + for site in range(len(self.lbls)): + + # Site name + lbl = self.lbls[site] + page.addFigure("Diurnal cycle", + lbl, + "MNAME_diurnal_%s.png" % lbl, + side = lbl, + legend = False) + diff --git a/src/ILAMB/ConfEvapFraction.py b/src/ILAMB/ConfEvapFraction.py new file mode 100644 index 00000000..46630614 --- /dev/null +++ b/src/ILAMB/ConfEvapFraction.py @@ -0,0 +1,65 @@ +from ILAMB.Confrontation import Confrontation +from mpl_toolkits.basemap import Basemap +from ILAMB.Variable import Variable +from netCDF4 import Dataset +import ILAMB.ilamblib as il +import numpy as np +import os + +class ConfEvapFraction(Confrontation): + + def stageData(self,m): + + energy_threshold = float(self.keywords.get("energy_threshold",20.)) + sh = Variable(filename = os.path.join(os.environ["ILAMB_ROOT"],"DATA/sh/GBAF/sh_0.5x0.5.nc"), + variable_name = "sh") + le = Variable(filename = os.path.join(os.environ["ILAMB_ROOT"],"DATA/le/GBAF/le_0.5x0.5.nc"), + variable_name = "le") + obs = Variable(name = self.variable, + unit = "1", + data = np.ma.masked_array(le.data/(le.data+sh.data), + mask=((le.data<0)+ + (sh.data<0)+ + ((le.data+sh.data) 0: + has_std = True + sds = dset.groups["scalars"].variables[key[0]] + corr[region].append(sds.getncattr("R" )) + std [region].append(sds.getncattr("std")) + + if has_std: + + # Legends + def _alphabeticalBenchmarkFirst(key): + key = key[0].upper() + if key == "BENCHMARK": return 0 + return key + tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) + fig,ax = plt.subplots() + for model,color in tmp: + ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) + handles,labels = ax.get_legend_handles_labels() + plt.close() + ncol = np.ceil(float(len(models))/11.).astype(int) + fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) + ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) + ax.axis('off') + fig.savefig("%s/legend_spatial_variance.png" % self.output_path) + plt.close() + + + page.addFigure("Period mean at surface", + "spatial_variance", + "RNAME_spatial_variance.png", + side = "SPATIAL TAYLOR DIAGRAM", + legend = False) + page.addFigure("Period mean at surface", + "legend_spatial_variance", + "legend_spatial_variance.png", + side = "MODEL COLORS", + legend = False) + if "Benchmark" in models: colors.pop(models.index("Benchmark")) + for region in self.regions: + if not (std.has_key(region) and corr.has_key(region)): continue + if len(std[region]) != len(corr[region]): continue + if len(std[region]) == 0: continue + fig = plt.figure(figsize=(6.0,6.0)) + post.TaylorDiagram(np.asarray(std[region]),np.asarray(corr[region]),1.0,fig,colors) + fig.savefig("%s/%s_spatial_variance.png" % (self.output_path,region)) + plt.close() + + + def modelPlots(self,m): + + def _fheight(region): + if region in ["arctic","southern"]: return 6.8 + return 2.8 + + bname = "%s/%s_Benchmark.nc" % (self.output_path,self.name) + fname = "%s/%s_%s.nc" % (self.output_path,self.name,m.name) + if not os.path.isfile(bname): return + if not os.path.isfile(fname): return + + # get the HTML page + page = [page for page in self.layout.pages if "MeanState" in page.name][0] + + with Dataset(fname) as dataset: + group = dataset.groups["MeanState"] + variables = getVariableList(group) + color = dataset.getncattr("color") + + vname = "timeint_surface_%s" % self.variable + if vname in variables: + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + page.addFigure("Period mean at surface", + "timeint", + "MNAME_RNAME_timeint.png", + side = "MODEL SURFACE MEAN", + legend = True) + for region in self.regions: + fig = plt.figure() + ax = fig.add_axes([0.06,0.025,0.88,0.965]) + var.plot(ax, + region = region, + vmin = self.limits["timeint"]["min"], + vmax = self.limits["timeint"]["max"], + cmap = self.cmap, + land = 0.750, + water = 0.875) + fig.savefig("%s/%s_%s_timeint.png" % (self.output_path,m.name,region)) + plt.close() + + vname = "bias_surface_%s" % self.variable + if vname in variables: + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + page.addFigure("Period mean at surface", + "bias", + "MNAME_RNAME_bias.png", + side = "SURFACE MEAN BIAS", + legend = True) + for region in self.regions: + fig = plt.figure() + ax = fig.add_axes([0.06,0.025,0.88,0.965]) + var.plot(ax, + region = region, + vmin = self.limits["bias"]["min"], + vmax = self.limits["bias"]["max"], + cmap = "seismic", + land = 0.750, + water = 0.875) + fig.savefig("%s/%s_%s_bias.png" % (self.output_path,m.name,region)) + plt.close() + + vname = "biasscore_surface_%s" % self.variable + if vname in variables: + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + page.addFigure("Period mean at surface", + "biasscore", + "MNAME_RNAME_biasscore.png", + side = "SURFACE MEAN BIAS SCORE", + legend = True) + for region in self.regions: + fig = plt.figure() + ax = fig.add_axes([0.06,0.025,0.88,0.965]) + var.plot(ax, + region = region, + vmin = 0, + vmax = 1, + cmap = "RdYlGn", + land = 0.750, + water = 0.875) + fig.savefig("%s/%s_%s_biasscore.png" % (self.output_path,m.name,region)) + plt.close() + + vname = "rmse_surface_%s" % self.variable + if vname in variables: + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + page.addFigure("Period mean at surface", + "rmse", + "MNAME_RNAME_rmse.png", + side = "SURFACE MEAN RMSE", + legend = True) + for region in self.regions: + fig = plt.figure() + ax = fig.add_axes([0.06,0.025,0.88,0.965]) + var.plot(ax, + region = region, + vmin = self.limits["rmse"]["min"], + vmax = self.limits["rmse"]["max"], + cmap = "YlOrRd", + land = 0.750, + water = 0.875) + fig.savefig("%s/%s_%s_rmse.png" % (self.output_path,m.name,region)) + plt.close() + + vname = "rmsescore_surface_%s" % self.variable + if vname in variables: + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + page.addFigure("Period mean at surface", + "rmsescore", + "MNAME_RNAME_rmsescore.png", + side = "SURFACE MEAN RMSE SCORE", + legend = True) + for region in self.regions: + fig = plt.figure() + ax = fig.add_axes([0.06,0.025,0.88,0.965]) + var.plot(ax, + region = region, + vmin = 0, + vmax = 1, + cmap = "RdYlGn", + land = 0.750, + water = 0.875) + fig.savefig("%s/%s_%s_rmsescore.png" % (self.output_path,m.name,region)) + plt.close() + + for region in self.regions: + + vname = "timelonint_of_%s_over_%s" % (self.variable,region) + if vname in variables: + var = Variable(filename=fname,variable_name=vname,groupname="MeanState") + if region == "global": + page.addFigure("Mean regional depth profiles", + "timelonint", + "MNAME_RNAME_timelonint.png", + side = "MODEL DEPTH PROFILE", + legend = True, + longname = "Time/longitude averaged profile") + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + l = np.hstack([var.lat_bnds [:,0],var.lat_bnds [-1,1]]) + d = np.hstack([var.depth_bnds[:,0],var.depth_bnds[-1,1]]) + ind = np.all(var.data.mask,axis=0) + ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) + b = ind.min() + e = ind.max()+1 + ax.pcolormesh(l[b:(e+1)],d,var.data[:,b:e], + vmin = self.limits["timelonint"]["global"]["min"], + vmax = self.limits["timelonint"]["global"]["max"], + cmap = self.cmap) + ax.set_xlabel("latitude") + ax.set_ylim((d.max(),d.min())) + ax.set_ylabel("depth [m]") + fig.savefig("%s/%s_%s_timelonint.png" % (self.output_path,m.name,region)) + plt.close() + + if not self.master: return + + with Dataset(bname) as dataset: + group = dataset.groups["MeanState"] + variables = getVariableList(group) + color = dataset.getncattr("color") + + vname = "timeint_surface_%s" % self.variable + if vname in variables: + var = Variable(filename=bname,variable_name=vname,groupname="MeanState") + page.addFigure("Period mean at surface", + "benchmark_timeint", + "Benchmark_RNAME_timeint.png", + side = "BENCHMARK SURFACE MEAN", + legend = True) + for region in self.regions: + fig = plt.figure() + ax = fig.add_axes([0.06,0.025,0.88,0.965]) + var.plot(ax, + region = region, + vmin = self.limits["timeint"]["min"], + vmax = self.limits["timeint"]["max"], + cmap = self.cmap, + land = 0.750, + water = 0.875) + fig.savefig("%s/Benchmark_%s_timeint.png" % (self.output_path,region)) + plt.close() + + for region in self.regions: + + vname = "timelonint_of_%s_over_%s" % (self.variable,region) + if vname in variables: + var = Variable(filename=bname,variable_name=vname,groupname="MeanState") + if region == "global": + page.addFigure("Mean regional depth profiles", + "benchmark_timelonint", + "Benchmark_RNAME_timelonint.png", + side = "BENCHMARK DEPTH PROFILE", + legend = True, + longname = "Time/longitude averaged profile") + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + l = np.hstack([var.lat_bnds [:,0],var.lat_bnds [-1,1]]) + d = np.hstack([var.depth_bnds[:,0],var.depth_bnds[-1,1]]) + ind = np.all(var.data.mask,axis=0) + ind = np.ma.masked_array(range(ind.size),mask=ind,dtype=int) + b = ind.min() + e = ind.max()+1 + ax.pcolormesh(l[b:(e+1)],d,var.data[:,b:e], + vmin = self.limits["timelonint"]["global"]["min"], + vmax = self.limits["timelonint"]["global"]["max"], + cmap = self.cmap) + ax.set_xlabel("latitude") + ax.set_ylim((d.max(),d.min())) + ax.set_ylabel("depth [m]") + fig.savefig("%s/Benchmark_%s_timelonint.png" % (self.output_path,region)) + plt.close() + + def determinePlotLimits(self): + + # Pick limit type + max_str = "up99"; min_str = "dn99" + if self.keywords.get("limit_type","99per") == "minmax": + max_str = "max"; min_str = "min" + + # Determine the min/max of variables over all models + limits = {} + for fname in glob.glob("%s/*.nc" % self.output_path): + with Dataset(fname) as dataset: + if "MeanState" not in dataset.groups: continue + group = dataset.groups["MeanState"] + variables = [v for v in group.variables.keys() if (v not in group.dimensions.keys() and + "_bnds" not in v and + group.variables[v][...].size > 1)] + for vname in variables: + var = group.variables[vname] + pname = vname.split("_")[ 0] + if "_over_" in vname: + region = vname.split("_over_")[-1] + if not limits.has_key(pname): limits[pname] = {} + if not limits[pname].has_key(region): + limits[pname][region] = {} + limits[pname][region]["min"] = +1e20 + limits[pname][region]["max"] = -1e20 + limits[pname][region]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pname][region]["min"] = min(limits[pname][region]["min"],var.getncattr("min")) + limits[pname][region]["max"] = max(limits[pname][region]["max"],var.getncattr("max")) + else: + if not limits.has_key(pname): + limits[pname] = {} + limits[pname]["min"] = +1e20 + limits[pname]["max"] = -1e20 + limits[pname]["unit"] = post.UnitStringToMatplotlib(var.getncattr("units")) + limits[pname]["min"] = min(limits[pname]["min"],var.getncattr(min_str)) + limits[pname]["max"] = max(limits[pname]["max"],var.getncattr(max_str)) + + # Another pass to fix score limits + for pname in limits.keys(): + if "score" in pname: + if "min" in limits[pname].keys(): + limits[pname]["min"] = 0. + limits[pname]["max"] = 1. + else: + for region in limits[pname].keys(): + limits[pname][region]["min"] = 0. + limits[pname][region]["max"] = 1. + self.limits = limits + + # Second pass to plot legends + cmaps = {"bias":"seismic", + "rmse":"YlOrRd"} + for pname in limits.keys(): + + # Pick colormap + cmap = self.cmap + if cmaps.has_key(pname): + cmap = cmaps[pname] + elif "score" in pname: + cmap = "RdYlGn" + + # Need to symetrize? + if pname in ["bias"]: + vabs = max(abs(limits[pname]["min"]),abs(limits[pname]["min"])) + limits[pname]["min"] = -vabs + limits[pname]["max"] = vabs + + # Some plots need legends + if pname in ["timeint","bias","biasscore","rmse","rmsescore","timelonint"]: + if limits[pname].has_key("min"): + fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) + post.ColorBar(ax, + vmin = limits[pname]["min" ], + vmax = limits[pname]["max" ], + label = limits[pname]["unit"], + cmap = cmap) + fig.savefig("%s/legend_%s.png" % (self.output_path,pname)) + plt.close() + else: + fig,ax = plt.subplots(figsize=(6.8,1.0),tight_layout=True) + post.ColorBar(ax, + vmin = limits[pname]["global"]["min" ], + vmax = limits[pname]["global"]["max" ], + label = limits[pname]["global"]["unit"], + cmap = cmap) + fig.savefig("%s/legend_%s.png" % (self.output_path,pname)) + plt.close() + + + diff --git a/src/ILAMB/ConfNBP.py b/src/ILAMB/ConfNBP.py new file mode 100644 index 00000000..96e618e0 --- /dev/null +++ b/src/ILAMB/ConfNBP.py @@ -0,0 +1,198 @@ +from Confrontation import Confrontation +from Variable import Variable +from netCDF4 import Dataset +from copy import deepcopy +import ilamblib as il +import pylab as plt +import Post as post +import numpy as np +import os,glob + +class ConfNBP(Confrontation): + """A confrontation for examining the global net ecosystem carbon balance. + + """ + def __init__(self,**keywords): + + # Ugly, but this is how we call the Confrontation constructor + super(ConfNBP,self).__init__(**keywords) + + # Now we overwrite some things which are different here + self.regions = ['global'] + self.layout.regions = self.regions + + def stageData(self,m): + r"""Extracts model data and integrates it over the globe to match the confrontation dataset. + + Parameters + ---------- + m : ILAMB.ModelResult.ModelResult + the model result context + + Returns + ------- + obs : ILAMB.Variable.Variable + the variable context associated with the observational dataset + mod : ILAMB.Variable.Variable + the variable context associated with the model result + + """ + # get the observational data + obs = Variable(filename = self.source, + variable_name = self.variable, + alternate_vars = self.alternate_vars) + + # the model data needs integrated over the globe + mod = m.extractTimeSeries(self.variable, + alt_vars = self.alternate_vars) + mod = mod.integrateInSpace().convert(obs.unit) + tmin = mod.time_bnds[ 0,0] + tmax = mod.time_bnds[-1,1] + obs,mod = il.MakeComparable(obs,mod,clip_ref=True) + + # The obs can go beyond the information which models have + obs.trim(t=[tmin,tmax]) + mod.trim(t=[tmin,tmax]) + + # sign convention is backwards + obs.data *= -1. + mod.data *= -1. + + return obs,mod + + def confront(self,m): + r"""Confronts the input model with the observational data. + + Parameters + ---------- + m : ILAMB.ModelResult.ModelResult + the model results + + """ + # Grab the data + obs,mod = self.stageData(m) + obs_sum = obs.accumulateInTime().convert("Pg") + mod_sum = mod.accumulateInTime().convert("Pg") + + # End of period information + yf = np.round(obs.time_bnds[-1,1]/365.+1850.) + obs_end = Variable(name = "nbp(%4d)" % yf, + unit = obs_sum.unit, + data = obs_sum.data[-1]) + mod_end = Variable(name = "nbp(%4d)" % yf, + unit = mod_sum.unit, + data = mod_sum.data[-1]) + mod_diff = Variable(name = "diff(%4d)" % yf, + unit = mod_sum.unit, + data = mod_sum.data[-1]-obs_sum.data[-1]) + + # Difference score normlized by the uncertainty in the + # accumulation at the end of the time period. + normalizer = 0. + if "GCP" in self.longname: normalizer = 21.6*0.5 + if "Hoffman" in self.longname: normalizer = 84.6*0.5 + dscore = Variable(name = "Difference Score global" % yf, + unit = "1", + data = np.exp(-0.287*np.abs(mod_diff.data/normalizer))) + + # Temporal distribution + skip_taylor = self.keywords.get("skip_taylor",False) + if not skip_taylor: + np.seterr(over='ignore',under='ignore') + std0 = obs.data.std() + std = mod.data.std() + np.seterr(over='raise' ,under='raise' ) + R0 = 1.0 + R = obs.correlation(mod,ctype="temporal") + std /= std0 + score = Variable(name = "Temporal Distribution Score global", + unit = "1", + data = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0))) + + # Change names to make things easier to parse later + obs .name = "spaceint_of_nbp_over_global" + mod .name = "spaceint_of_nbp_over_global" + obs_sum .name = "accumulate_of_nbp_over_global" + mod_sum .name = "accumulate_of_nbp_over_global" + + # Dump to files + results = Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="w") + results.setncatts({"name" :m.name, "color":m.color}) + mod .toNetCDF4(results,group="MeanState") + mod_sum .toNetCDF4(results,group="MeanState") + mod_end .toNetCDF4(results,group="MeanState") + mod_diff .toNetCDF4(results,group="MeanState") + dscore .toNetCDF4(results,group="MeanState") + if not skip_taylor: + score .toNetCDF4(results,group="MeanState",attributes={"std":std,"R":R.data}) + results.close() + + if self.master: + results = Dataset(os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name)),mode="w") + results.setncatts({"name" :"Benchmark", "color":np.asarray([0.5,0.5,0.5])}) + obs .toNetCDF4(results,group="MeanState") + obs_sum .toNetCDF4(results,group="MeanState") + obs_end .toNetCDF4(results,group="MeanState") + results.close() + + + def compositePlots(self): + + # we want to run the original and also this additional plot + super(ConfNBP,self).compositePlots() + + # get the HTML page + page = [page for page in self.layout.pages if "MeanState" in page.name][0] + + colors = {} + corr = {} + std = {} + accum = {} + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + dataset = Dataset(fname) + if "MeanState" not in dataset.groups: continue + dset = dataset.groups["MeanState"] + mname = dataset.getncattr("name") + colors[mname] = dataset.getncattr("color") + key = [v for v in dset.groups["scalars"].variables.keys() if ("Temporal Distribution Score" in v)] + if len(key) > 0: + sds = dset.groups["scalars"].variables[key[0]] + corr[mname] = sds.R + std [mname] = sds.std + if "accumulate_of_nbp_over_global" in dset.variables.keys(): + accum[mname] = Variable(filename = fname, + variable_name = "accumulate_of_nbp_over_global", + groupname = "MeanState") + + # temporal distribution Taylor plot + if len(corr) > 0: + page.addFigure("Spatially integrated regional mean", + "temporal_variance", + "temporal_variance.png", + side = "TEMPORAL TAYLOR DIAGRAM", + legend = False) + fig = plt.figure(figsize=(6.0,6.0)) + keys = corr.keys() + post.TaylorDiagram(np.asarray([std [key] for key in keys]), + np.asarray([corr[key] for key in keys]), + 1.0,fig, + [colors[key] for key in keys]) + fig.savefig(os.path.join(self.output_path,"temporal_variance.png")) + plt.close() + + + # composite annual cycle plot + if len(accum) > 1: + page.addFigure("Spatially integrated regional mean", + "compaccumulation", + "RNAME_compaccumulation.png", + side = "ACCUMULATION", + legend = False) + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + dy = 0.05*(self.limits["accumulate"]["global"]["max"]-self.limits["accumulate"]["global"]["min"]) + for key in accum: + accum[key].plot(ax,lw=2,color=colors[key],label=key, + vmin=self.limits["accumulate"]["global"]["min"]-dy, + vmax=self.limits["accumulate"]["global"]["max"]+dy) + fig.savefig(os.path.join(self.output_path,"global_compaccumulation.png" )) + plt.close() diff --git a/src/ILAMB/ConfPermafrost.py b/src/ILAMB/ConfPermafrost.py new file mode 100644 index 00000000..92c2ead4 --- /dev/null +++ b/src/ILAMB/ConfPermafrost.py @@ -0,0 +1,223 @@ +from Confrontation import Confrontation +from mpl_toolkits.basemap import Basemap +from Variable import Variable +from Post import ColorBar +import matplotlib.pyplot as plt +from netCDF4 import Dataset +import ilamblib as il +import numpy as np + +class ConfPermafrost(Confrontation): + + def __init__(self,**keywords): + + # Ugly, but this is how we call the Confrontation constructor + super(ConfPermafrost,self).__init__(**keywords) + + # Now we overwrite some things which are different here + self.layout + self.regions = ["global"] + self.layout.regions = self.regions + self.weight = { "Obs Score" : 1., + "Mod Score" : 1. } + for page in self.layout.pages: + page.setMetricPriority(["Total Area" , + "Overlap Area", + "Missed Area" , + "Excess Area" , + "Obs Score" , + "Mod Score" , + "Overall Score"]) + + def stageData(self,m): + + obs = Variable(filename = self.source, + variable_name = "permafrost_extent") + + # These parameters may be changed from the configure file + y0 = float(self.keywords.get("y0" ,1970.)) # [yr] beginning year to include in analysis + yf = float(self.keywords.get("yf" ,2000.)) # [yr] end year to include in analysis + dmax = float(self.keywords.get("dmax",3.5)) # [m] consider layers where depth in is the range [0,dmax] + Teps = float(self.keywords.get("Teps",273.15)) # [K] temperature below which we assume permafrost occurs + + t0 = (y0 -1850.)*365. + tf = (yf+1-1850.)*365. + mod = m.extractTimeSeries(self.variable, + initial_time = t0, + final_time = tf) + mod.trim(t = [t0 ,tf ], + lat = [obs.lat.min(),90 ], + d = [0 ,dmax]) + mod = mod.annualCycle() + Tmax = mod.data.max(axis=0) + table = np.zeros(Tmax.shape[-2:]) + table[...] = np.NAN + thaw = np.zeros(table.shape,dtype=bool) + for i in range(mod.depth_bnds.shape[0]-1,-1,-1): + thaw += (Tmax[i]>=Teps) + frozen = np.where((Tmax[i]0: + has_cycle = True + cycle[region].append(Variable(filename=fname,groupname="MeanState",variable_name=key[0])) + + if not std. has_key(region): std [region] = [] + if not corr. has_key(region): corr [region] = [] + + key = [] + if "scalars" in dset.groups: + key = [v for v in dset.groups["scalars"].variables.keys() if ("Spatial Distribution Score" in v and region in v)] + if len(key) > 0: + has_std = True + sds = dset.groups["scalars"].variables[key[0]] + corr[region].append(sds.getncattr("R" )) + std [region].append(sds.getncattr("std")) + + # composite annual cycle plot + if has_cycle and len(models) > 2: + page.addFigure("Spatially integrated regional mean", + "compcycle", + "RNAME_compcycle.png", + side = "ANNUAL CYCLE", + legend = False) + + for region in self.regions: + if not cycle.has_key(region): continue + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + for name,color,var in zip(models,colors,cycle[region]): + dy = 0.05*(self.limits["cycle"][region]["max"]-self.limits["cycle"][region]["min"]) + var.plot(ax,lw=2,color=color,label=name, + ticks = time_opts["cycle"]["ticks"], + ticklabels = time_opts["cycle"]["ticklabels"], + vmin = self.limits["cycle"][region]["min"]-dy, + vmax = self.limits["cycle"][region]["max"]+dy) + ylbl = time_opts["cycle"]["ylabel"] + if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) + ax.set_ylabel(ylbl) + fig.savefig(os.path.join(self.output_path,"%s_compcycle.png" % (region))) + plt.close() + + # plot legends with model colors (sorted with Benchmark data on top) + page.addFigure("Spatially integrated regional mean", + "legend_compcycle", + "legend_compcycle.png", + side = "MODEL COLORS", + legend = False) + def _alphabeticalBenchmarkFirst(key): + key = key[0].upper() + if key == "BENCHMARK": return 0 + return key + tmp = sorted(zip(models,colors),key=_alphabeticalBenchmarkFirst) + fig,ax = plt.subplots() + for model,color in tmp: + ax.plot(0,0,'o',mew=0,ms=8,color=color,label=model) + handles,labels = ax.get_legend_handles_labels() + plt.close() + + ncol = np.ceil(float(len(models))/11.).astype(int) + if ncol > 0: + fig,ax = plt.subplots(figsize=(3.*ncol,2.8),tight_layout=True) + ax.legend(handles,labels,loc="upper right",ncol=ncol,fontsize=10,numpoints=1) + ax.axis('off') + fig.savefig(os.path.join(self.output_path,"legend_compcycle.png")) + fig.savefig(os.path.join(self.output_path,"legend_spatial_variance.png")) + fig.savefig(os.path.join(self.output_path,"legend_temporal_variance.png")) + plt.close() + + # spatial distribution Taylor plot + if has_std: + page.addFigure("Temporally integrated period mean", + "spatial_variance", + "RNAME_spatial_variance.png", + side = "SPATIAL TAYLOR DIAGRAM", + legend = False) + page.addFigure("Temporally integrated period mean", + "legend_spatial_variance", + "legend_spatial_variance.png", + side = "MODEL COLORS", + legend = False) + if "Benchmark" in models: colors.pop(models.index("Benchmark")) + for region in self.regions: + if not (std.has_key(region) and corr.has_key(region)): continue + if len(std[region]) != len(corr[region]): continue + if len(std[region]) == 0: continue + fig = plt.figure(figsize=(6.0,6.0)) + post.TaylorDiagram(np.asarray(std[region]),np.asarray(corr[region]),1.0,fig,colors) + fig.savefig(os.path.join(self.output_path,"%s_spatial_variance.png" % region)) + plt.close() + + def modelPlots(self,m): + """For a given model, create the plots of the analysis results. + + This routine will extract plotting information out of the + netCDF file which results from the analysis and create + plots. Note that determinePlotLimits should be called before + this routine. + + """ + self._relationship(m) + bname = os.path.join(self.output_path,"%s_Benchmark.nc" % (self.name )) + fname = os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)) + if not os.path.isfile(bname): return + if not os.path.isfile(fname): return + + # get the HTML page + page = [page for page in self.layout.pages if "MeanState" in page.name][0] + + with Dataset(fname) as dataset: + group = dataset.groups["MeanState"] + variables = getVariableList(group) + color = dataset.getncattr("color") + for vname in variables: + + # is this a variable we need to plot? + pname = vname.split("_")[0] + if group.variables[vname][...].size <= 1: continue + var = Variable(filename=fname,groupname="MeanState",variable_name=vname) + + if (var.spatial or (var.ndata is not None)) and not var.temporal: + + # grab plotting options + if pname not in self.limits.keys(): continue + opts = space_opts[pname] + + # add to html layout + page.addFigure(opts["section"], + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) + + # plot variable + for region in self.regions: + fig = plt.figure(figsize=(6.8,2.8)) + ax = fig.add_axes([0.06,0.025,0.88,0.965]) + var.plot(ax, + region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) + plt.close() + + # Jumping through hoops to get the benchmark plotted and in the html output + if self.master and (pname == "timeint" or pname == "phase" or pname == "iav"): + + opts = space_opts[pname] + + # add to html layout + page.addFigure(opts["section"], + "benchmark_%s" % pname, + opts["pattern"].replace("MNAME","Benchmark"), + side = opts["sidelbl"].replace("MODEL","BENCHMARK"), + legend = True) + + # plot variable + obs = Variable(filename=bname,groupname="MeanState",variable_name=vname) + for region in self.regions: + fig = plt.figure(figsize=(6.8,2.8)) + ax = fig.add_axes([0.06,0.025,0.88,0.965]) + obs.plot(ax, + region = region, + vmin = self.limits[pname]["min"], + vmax = self.limits[pname]["max"], + cmap = self.limits[pname]["cmap"]) + fig.savefig(os.path.join(self.output_path,"Benchmark_%s_%s.png" % (region,pname))) + plt.close() + + if not (var.spatial or (var.ndata is not None)) and var.temporal: + + # grab the benchmark dataset to plot along with + obs = Variable(filename=bname,groupname="MeanState",variable_name=vname).convert(var.unit) + + # grab plotting options + opts = time_opts[pname] + + # add to html layout + page.addFigure(opts["section"], + pname, + opts["pattern"], + side = opts["sidelbl"], + legend = opts["haslegend"]) + + # plot variable + for region in self.regions: + if region not in vname: continue + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + obs.plot(ax,lw=2,color='k',alpha=0.5) + var.plot(ax,lw=2,color=color,label=m.name, + ticks =opts["ticks"], + ticklabels=opts["ticklabels"]) + + dy = 0.05*(self.limits[pname][region]["max"]-self.limits[pname][region]["min"]) + ax.set_ylim(self.limits[pname][region]["min"]-dy, + self.limits[pname][region]["max"]+dy) + ylbl = opts["ylabel"] + if ylbl == "unit": ylbl = post.UnitStringToMatplotlib(var.unit) + ax.set_ylabel(ylbl) + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,region,pname))) + plt.close() + + logger.info("[%s][%s] Success" % (self.longname,m.name)) + + def sitePlots(self,m): + """ + + """ + if not self.hasSites: return + + obs,mod = self.stageData(m) + for i in range(obs.ndata): + fig,ax = plt.subplots(figsize=(6.8,2.8),tight_layout=True) + tmask = np.where(mod.data.mask[:,i]==False)[0] + if tmask.size > 0: + tmin,tmax = tmask[[0,-1]] + else: + tmin = 0; tmax = mod.time.size-1 + + t = mod.time[tmin:(tmax+1) ] + x = mod.data[tmin:(tmax+1),i] + y = obs.data[tmin:(tmax+1),i] + ax.plot(t,y,'-k',lw=2,alpha=0.5) + ax.plot(t,x,'-',color=m.color) + + ind = np.where(t % 365 < 30.)[0] + ticks = t[ind] - (t[ind] % 365) + ticklabels = (ticks/365.+1850.).astype(int) + ax.set_xticks (ticks ) + ax.set_xticklabels(ticklabels) + ax.set_ylabel(post.UnitStringToMatplotlib(mod.unit)) + fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (m.name,self.lbls[i],"time"))) + plt.close() + + + def generateHtml(self): + """Generate the HTML for the results of this confrontation. + + This routine opens all netCDF files and builds a table of + metrics. Then it passes the results to the HTML generator and + saves the result in the output directory. This only occurs on + the confrontation flagged as master. + + """ + # only the master processor needs to do this + if not self.master: return + + for page in self.layout.pages: + + # build the metric dictionary + metrics = {} + page.models = [] + for fname in glob.glob(os.path.join(self.output_path,"*.nc")): + with Dataset(fname) as dataset: + mname = dataset.getncattr("name") + if mname != "Benchmark": page.models.append(mname) + if not dataset.groups.has_key(page.name): continue + group = dataset.groups[page.name] + + # if the dataset opens, we need to add the model (table row) + metrics[mname] = {} + + # each model will need to have all regions + for region in self.regions: metrics[mname][region] = {} + + # columns in the table will be in the scalars group + if not group.groups.has_key("scalars"): continue + + # we add scalars to the model/region based on the region + # name being in the variable name. If no region is found, + # we assume it is the global region. + grp = group.groups["scalars"] + for vname in grp.variables.keys(): + found = False + for region in self.regions: + if region in vname: + found = True + var = grp.variables[vname] + name = vname.replace(region,"") + metrics[mname][region][name] = Variable(name = name, + unit = var.units, + data = var[...]) + if not found: + var = grp.variables[vname] + metrics[mname]["global"][vname] = Variable(name = vname, + unit = var.units, + data = var[...]) + page.setMetrics(metrics) + + # write the HTML page + f = file(os.path.join(self.output_path,"%s.html" % (self.name)),"w") + f.write(str(self.layout)) + f.close() + + def _relationship(self,m,nbin=25): + """ + + """ + + def _retrieveData(filename): + key = None + with Dataset(filename,mode="r") as dset: + key = [v for v in dset.groups["MeanState"].variables.keys() if "timeint_" in v] + return Variable(filename = filename, + groupname = "MeanState", + variable_name = key[0]) + + def _checkLim(data,lim): + if lim is None: + lim = [min(data.min(),data.min()), + max(data.max(),data.max())] + delta = 1e-8*(lim[1]-lim[0]) + lim[0] -= delta + lim[1] += delta + else: + assert type(lim) == type([]) + assert len (lim) == 2 + return lim + + def _limitExtents(vars): + lim = [+1e20,-1e20] + for v in vars: + lmin,lmax = _checkLim(v.data,None) + lim[0] = min(lmin,lim[0]) + lim[1] = max(lmax,lim[1]) + return lim + + def _buildDistributionResponse(ind,dep,ind_lim=None,dep_lim=None,region=None,nbin=25,eps=3e-3): + + r = Regions() + + # Checks on the input parameters + assert np.allclose(ind.data.shape,dep.data.shape) + ind_lim = _checkLim(ind.data,ind_lim) + dep_lim = _checkLim(dep.data,dep_lim) + + # Mask data + mask = ind.data.mask + dep.data.mask + if region is not None: mask += r.getMask(region,ind) + x = ind.data[mask==False].flatten() + y = dep.data[mask==False].flatten() + + # Compute normalized 2D distribution + dist,xedges,yedges = np.histogram2d(x,y, + bins = [nbin,nbin], + range = [ind_lim,dep_lim]) + dist = np.ma.masked_values(dist.T,0).astype(float) + dist /= dist.sum() + + # Compute the functional response + which_bin = np.digitize(x,xedges).clip(1,xedges.size-1)-1 + mean = np.ma.zeros(xedges.size-1) + std = np.ma.zeros(xedges.size-1) + cnt = np.ma.zeros(xedges.size-1) + np.seterr(under='ignore') + for i in range(mean.size): + yi = y[which_bin==i] + cnt [i] = yi.size + mean[i] = yi.mean() + std [i] = yi.std() + mean = np.ma.masked_array(mean,mask = (cnt/cnt.sum()) < eps) + std = np.ma.masked_array( std,mask = (cnt/cnt.sum()) < eps) + np.seterr(under='warn') + return dist,xedges,yedges,mean,std + + def _scoreDistribution(ref,com): + mask = ref.mask + com.mask + ref = np.ma.masked_array(ref.data,mask=mask).compressed() + com = np.ma.masked_array(com.data,mask=mask).compressed() + return np.sqrt(((np.sqrt(ref)-np.sqrt(com))**2).sum())/np.sqrt(2) + + def _scoreFunction(ref,com): + mask = ref.mask + com.mask + ref = np.ma.masked_array(ref.data,mask=mask).compressed() + com = np.ma.masked_array(com.data,mask=mask).compressed() + return np.exp(-np.linalg.norm(ref-com)/np.linalg.norm(ref)) + + def _plotDistribution(dist,xedges,yedges,xlabel,ylabel,filename): + fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) + pc = ax.pcolormesh(xedges, yedges, dist, + norm = LogNorm(), + cmap = 'plasma' if plt.cm.cmap_d.has_key('plasma') else 'summer', + vmin = 1e-4, vmax = 1e-1) + div = make_axes_locatable(ax) + fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), + orientation="vertical",label="Fraction of total datasites") + ax.set_xlabel(xlabel,fontsize = 12) + ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) + ax.set_xlim(xedges[0],xedges[-1]) + ax.set_ylim(yedges[0],yedges[-1]) + fig.savefig(filename) + plt.close() + + def _plotDifference(ref,com,xedges,yedges,xlabel,ylabel,filename): + ref = np.ma.copy(ref) + com = np.ma.copy(com) + ref.data[np.where(ref.mask)] = 0. + com.data[np.where(com.mask)] = 0. + diff = np.ma.masked_array(com.data-ref.data,mask=ref.mask*com.mask) + lim = np.abs(diff).max() + fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) + pc = ax.pcolormesh(xedges, yedges, diff, + cmap = 'Spectral_r', + vmin = -lim, vmax = +lim) + div = make_axes_locatable(ax) + fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), + orientation="vertical",label="Distribution Difference") + ax.set_xlabel(xlabel,fontsize = 12) + ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) + ax.set_xlim(xedges[0],xedges[-1]) + ax.set_ylim(yedges[0],yedges[-1]) + fig.savefig(filename) + plt.close() + + def _plotFunction(ref_mean,ref_std,com_mean,com_std,xedges,yedges,xlabel,ylabel,color,filename): + + xe = 0.5*(xedges[:-1]+xedges[1:]) + delta = 0.1*np.diff(xedges).mean() + + # reference function + ref_x = xe - delta + ref_y = ref_mean + ref_e = ref_std + if not (ref_mean.mask==False).all(): + ind = np.where(ref_mean.mask==False) + ref_x = xe [ind]-delta + ref_y = ref_mean[ind] + ref_e = ref_std [ind] + + # comparison function + com_x = xe + delta + com_y = com_mean + com_e = com_std + if not (com_mean.mask==False).all(): + ind = np.where(com_mean.mask==False) + com_x = xe [ind]-delta + com_y = com_mean[ind] + com_e = com_std [ind] + + fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) + ax.errorbar(ref_x,ref_y,yerr=ref_e,fmt='-o',color='k') + ax.errorbar(com_x,com_y,yerr=com_e,fmt='-o',color=color) + ax.set_xlabel(xlabel,fontsize = 12) + ax.set_ylabel(ylabel,fontsize = 12 if len(ylabel) <= 60 else 10) + ax.set_xlim(xedges[0],xedges[-1]) + ax.set_ylim(yedges[0],yedges[-1]) + fig.savefig(filename) + plt.close() + + # If there are no relationships to analyze, get out of here + if self.relationships is None: return + + # Get the HTML page + page = [page for page in self.layout.pages if "Relationships" in page.name] + if len(page) == 0: return + page = page[0] + + # Try to get the dependent data from the model and obs + try: + ref_dep = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,"Benchmark"))) + com_dep = _retrieveData(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name ))) + dep_name = self.longname.split("/")[0] + dep_min = self.limits["timeint"]["min"] + dep_max = self.limits["timeint"]["max"] + except: + return + + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="r+") as results: + + # Grab/create a relationship and scalars group + group = None + if "Relationships" not in results.groups: + group = results.createGroup("Relationships") + else: + group = results.groups["Relationships"] + if "scalars" not in group.groups: + scalars = group.createGroup("scalars") + else: + scalars = group.groups["scalars"] + + # for each relationship... + for c in self.relationships: + + # try to get the independent data from the model and obs + try: + ref_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) + com_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) + ind_name = c.longname.split("/")[0] + ind_min = c.limits["timeint"]["min"]-1e-12 + ind_max = c.limits["timeint"]["max"]+1e-12 + except: + continue + + # Add figures to the html page + page.addFigure(c.longname, + "benchmark_rel_%s" % ind_name, + "Benchmark_RNAME_rel_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_%s" % ind_name, + "MNAME_RNAME_rel_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_diff_%s" % ind_name, + "MNAME_RNAME_rel_diff_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_func_%s" % ind_name, + "MNAME_RNAME_rel_func_%s.png" % ind_name, + legend = False, + benchmark = False) + + # Analysis over regions + lim_dep = [dep_min,dep_max] + lim_ind = [ind_min,ind_max] + longname = c.longname.split('/')[0] + for region in self.regions: + ref_dist = _buildDistributionResponse(ref_ind,ref_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + com_dist = _buildDistributionResponse(com_ind,com_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + + # Make the plots + _plotDistribution(ref_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), + "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) + _plotDistribution(com_dist[0],com_dist[1],com_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) + _plotDifference (ref_dist[0],com_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) + _plotFunction (ref_dist[3],ref_dist[4],com_dist[3],com_dist[4],ref_dist[1],ref_dist[2], + "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), + m.color, + os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) + + # Score the distribution + score = _scoreDistribution(ref_dist[0],com_dist[0]) + sname = "%s Hellinger Distance %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + + # Score the functional response + score = _scoreFunction(ref_dist[3],com_dist[3]) + sname = "%s RMSE Score %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + + + + + +class FileContextManager(): + + def __init__(self,master,mod_results,obs_results): + + self.master = master + self.mod_results = mod_results + self.obs_results = obs_results + self.mod_dset = None + self.obs_dset = None + + def __enter__(self): + + # Open the file on entering, both if you are the master + self.mod_dset = Dataset(self.mod_results,mode="w") + if self.master: self.obs_dset = Dataset(self.obs_results,mode="w") + return self + + def __exit__(self, exc_type, exc_value, traceback): + + # Always close the file(s) on exit + self.mod_dset.close() + if self.master: self.obs_dset.close() + + # If an exception occurred, also remove the files + if exc_type is not None: + os.system("rm -f %s" % self.mod_results) + + diff --git a/src/ILAMB/ModelResult.py b/src/ILAMB/ModelResult.py new file mode 100644 index 00000000..6980d8b8 --- /dev/null +++ b/src/ILAMB/ModelResult.py @@ -0,0 +1,346 @@ +from Variable import Variable +from netCDF4 import Dataset +import ilamblib as il +import numpy as np +import glob,os +from mpi4py import MPI +import logging + +logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) + +class ModelResult(): + """A class for exploring model results. + + This class provides a simplified way of accessing model + results. It is essentially a pointer to a top level directory and + defines the model as all netCDF4 files found in its + subdirectories. If this directory contains model output from + several runs or experiments, you may wish to specify a string (the + *filter* argument) which we will require to be in the filename for + it to be considered part of the model. + + Parameters + ---------- + path : str + the full path to the directory which contains the model result + files + modelname : str, optional + a string representing the name of the model, will be used as a + label in plot legends + color : 3-tuple, optional + a normalized tuple representing a color in RGB color space, + will be used to color line plots + filter : str, optional + this string must be in file's name for it to be considered as + part of the model results + model_year : 2-tuple of int, optional + used to shift model times, all model years at model_year[0] + are shifted to model_year[1] + """ + def __init__(self,path,modelname="unamed",color=(0,0,0),filter="",model_year=None): + self.path = path + self.color = color + self.filter = filter + self.shift = 0. + if model_year is not None: self.shift = (model_year[1]-model_year[0])*365. + self.name = modelname + self.confrontations = {} + self.cell_areas = None + self.land_fraction = None + self.land_areas = None + self.land_area = None + self.lat = None + self.lon = None + self.lat_bnds = None + self.lon_bnds = None + self.variables = None + self.extents = np.asarray([[-90.,+90.],[-180.,+180.]]) + self._findVariables() + self._getGridInformation() + + def _findVariables(self): + """Loops through the netCDF4 files in a model's path and builds a dictionary of which variables are in which files. + """ + def _get(key,dset): + dim_name = key + try: + v = dset.variables[key] + dim_bnd_name = v.getncattr("bounds") + except: + dim_bnd_name = None + return dim_name,dim_bnd_name + + variables = {} + for subdir, dirs, files in os.walk(self.path): + for fileName in files: + if ".nc" not in fileName: continue + if self.filter not in fileName: continue + pathName = os.path.join(subdir,fileName) + dataset = Dataset(pathName) + + # populate dictionary for which variables are in which files + for key in dataset.variables.keys(): + if not variables.has_key(key): + variables[key] = [] + variables[key].append(pathName) + + # determine spatial extents + lats = [key for key in variables.keys() if (key.lower().startswith("lat" ) or + key.lower(). endswith("lat" ))] + lons = [key for key in variables.keys() if (key.lower().startswith("lon" ) or + key.lower(). endswith("lon" ) or + key.lower().startswith("long") or + key.lower(). endswith("long"))] + for key in lats: + for pathName in variables[key]: + with Dataset(pathName) as dset: + lat = dset.variables[key][...] + if lat.size == 1: continue + self.extents[0,0] = max(self.extents[0,0],lat.min()) + self.extents[0,1] = min(self.extents[0,1],lat.max()) + for key in lons: + for pathName in variables[key]: + with Dataset(pathName) as dset: + lon = dset.variables[key][...] + if lon.size == 1: continue + if lon.ndim < 1 or lon.ndim > 2: continue + lon = (lon<=180)*lon + (lon>180)*(lon-360) + (lon<-180)*360 + self.extents[1,0] = max(self.extents[1,0],lon.min()) + self.extents[1,1] = min(self.extents[1,1],lon.max()) + + # fix extents + eps = 5. + if self.extents[0,0] < (- 90.+eps): self.extents[0,0] = - 90. + if self.extents[0,1] > (+ 90.-eps): self.extents[0,1] = + 90. + if self.extents[1,0] < (-180.+eps): self.extents[1,0] = -180. + if self.extents[1,1] > (+180.-eps): self.extents[1,1] = +180. + self.variables = variables + + def _getGridInformation(self): + """Looks in the model output for cell areas as well as land fractions. + """ + # Are there cell areas associated with this model? + if "areacella" not in self.variables.keys(): return + f = Dataset(self.variables["areacella"][0]) + self.cell_areas = f.variables["areacella"][...] + self.lat = f.variables["lat"][...] + self.lon = f.variables["lon"][...] + self.lat_bnds = np.zeros(self.lat.size+1) + self.lat_bnds[:-1] = f.variables["lat_bnds"][:,0] + self.lat_bnds[-1] = f.variables["lat_bnds"][-1,1] + self.lon_bnds = np.zeros(self.lon.size+1) + self.lon_bnds[:-1] = f.variables["lon_bnds"][:,0] + self.lon_bnds[-1] = f.variables["lon_bnds"][-1,1] + + # Now we do the same for land fractions + if "sftlf" not in self.variables.keys(): + self.land_areas = self.cell_areas + else: + self.land_fraction = (Dataset(self.variables["sftlf"][0]).variables["sftlf"])[...] + # some models represent the fraction as a percent + if np.ma.max(self.land_fraction) > 1: self.land_fraction *= 0.01 + np.seterr(over='ignore') + self.land_areas = self.cell_areas*self.land_fraction + np.seterr(over='warn') + self.land_area = np.ma.sum(self.land_areas) + return + + def extractTimeSeries(self,variable,lats=None,lons=None,alt_vars=[],initial_time=-1e20,final_time=1e20,output_unit="",expression=None): + """Extracts a time series of the given variable from the model. + + Parameters + ---------- + variable : str + name of the variable to extract + alt_vars : list of str, optional + alternate variables to search for if *variable* is not found + initial_time : float, optional + include model results occurring after this time + final_time : float, optional + include model results occurring before this time + output_unit : str, optional + if specified, will try to convert the units of the variable + extract to these units given. + lats : numpy.ndarray, optional + a 1D array of latitude locations at which to extract information + lons : numpy.ndarray, optional + a 1D array of longitude locations at which to extract information + expression : str, optional + an algebraic expression describing how to combine model outputs + + Returns + ------- + var : ILAMB.Variable.Variable + the extracted variable + + """ + # prepend the target variable to the list of possible variables + altvars = list(alt_vars) + altvars.insert(0,variable) + + # checks on input consistency + if lats is not None: assert lons is not None + if lons is not None: assert lats is not None + if lats is not None: assert lats.shape == lons.shape + + # create a list of datafiles which have a non-null intersection + # over the desired time range + V = [] + tmin = 1e20 + tmax = -1e20 + for v in altvars: + if not self.variables.has_key(v): continue + for pathName in self.variables[v]: + var = Variable(filename = pathName, + variable_name = variable, + alternate_vars = altvars[1:], + area = self.land_areas, + t0 = initial_time - self.shift, + tf = final_time - self.shift) + tmin = min(tmin,var.time_bnds.min()) + tmax = max(tmax,var.time_bnds.max()) + if ((var.time_bnds.max() < initial_time - self.shift) or + (var.time_bnds.min() > final_time - self.shift)): continue + if lats is not None and var.ndata: + r = np.sqrt((lats[:,np.newaxis]-var.lat)**2 + + (lons[:,np.newaxis]-var.lon)**2) + imin = r.argmin(axis=1) + rmin = r. min(axis=1) + imin = imin[np.where(rmin<1.0)] + if imin.size == 0: + logger.debug("[%s] Could not find [%s] at the input sites in the model results" % (self.name,",".join(altvars))) + raise il.VarNotInModel() + var.lat = var.lat [ imin] + var.lon = var.lon [ imin] + var.data = var.data[:,imin] + var.ndata = var.data.shape[1] + if lats is not None and var.spatial: var = var.extractDatasites(lats,lons) + var.time += self.shift + var.time_bnds += self.shift + V.append(var) + if len(V) > 0: break + + # If we didn't find any files, try to put together the + # variable from a given expression + if len(V) == 0: + if expression is not None: + v = self.derivedVariable(variable, + expression, + lats = lats, + lons = lons, + initial_time = initial_time, + final_time = final_time) + else: + tstr = "" + if tmin < tmax: tstr = " in the given time frame, tinput = [%.1f,%.1f], tmodel = [%.1f,%.1f]" % (initial_time,final_time,tmin+self.shift,tmax+self.shift) + logger.debug("[%s] Could not find [%s] in the model results%s" % (self.name,",".join(altvars),tstr)) + raise il.VarNotInModel() + else: + v = il.CombineVariables(V) + + + return v + + def derivedVariable(self,variable_name,expression,lats=None,lons=None,initial_time=-1e20,final_time=1e20): + """Creates a variable from an algebraic expression of variables in the model results. + + Parameters + ---------- + variable_name : str + name of the variable to create + expression : str + an algebraic expression describing how to combine model outputs + initial_time : float, optional + include model results occurring after this time + final_time : float, optional + include model results occurring before this time + lats : numpy.ndarray, optional + a 1D array of latitude locations at which to extract information + lons : numpy.ndarray, optional + a 1D array of longitude locations at which to extract information + + Returns + ------- + var : ILAMB.Variable.Variable + the new variable + + """ + from sympy import sympify + if expression is None: raise il.VarNotInModel() + args = {} + units = {} + unit = expression + mask = None + time = None + tbnd = None + lat = None + lon = None + ndata = None + area = None + depth = None + dbnds = None + + for arg in sympify(expression).free_symbols: + + var = self.extractTimeSeries(arg.name, + lats = lats, + lons = lons, + initial_time = initial_time, + final_time = final_time) + units[arg.name] = var.unit + args [arg.name] = var.data.data + + if mask is None: + mask = var.data.mask + else: + mask += var.data.mask + if time is None: + time = var.time + else: + assert(np.allclose(time,var.time)) + if tbnd is None: + tbnd = var.time_bnds + else: + assert(np.allclose(tbnd,var.time_bnds)) + if lat is None: + lat = var.lat + else: + assert(np.allclose(lat,var.lat)) + if lon is None: + lon = var.lon + else: + assert(np.allclose(lon,var.lon)) + if area is None: + area = var.area + else: + assert(np.allclose(area,var.area)) + if ndata is None: + ndata = var.ndata + else: + assert(np.allclose(ndata,var.ndata)) + if depth is None: + depth = var.depth + else: + assert(np.allclose(depth,var.depth)) + if dbnds is None: + dbnds = var.depth_bnds + else: + assert(np.allclose(dbnds,var.depth_bnds)) + + np.seterr(divide='ignore',invalid='ignore') + result,unit = il.SympifyWithArgsUnits(expression,args,units) + np.seterr(divide='raise',invalid='raise') + mask += np.isnan(result) + result = np.ma.masked_array(np.nan_to_num(result),mask=mask) + + return Variable(data = np.ma.masked_array(result,mask=mask), + unit = unit, + name = variable_name, + time = time, + time_bnds = tbnd, + lat = lat, + lon = lon, + area = area, + ndata = ndata, + depth = depth, + depth_bnds = dbnds) diff --git a/src/ILAMB/Post.py b/src/ILAMB/Post.py new file mode 100644 index 00000000..809b154d --- /dev/null +++ b/src/ILAMB/Post.py @@ -0,0 +1,1218 @@ +import pylab as plt +import numpy as np +from constants import space_opts,time_opts +from Regions import Regions +import re + +def UseLatexPltOptions(fsize=18): + params = {'axes.titlesize':fsize, + 'axes.labelsize':fsize, + 'font.size':fsize, + 'legend.fontsize':fsize, + 'xtick.labelsize':fsize, + 'ytick.labelsize':fsize} + plt.rcParams.update(params) + +def UnitStringToMatplotlib(unit,add_carbon=False): + # replace 1e-6 with micro + match = re.findall("(1e-6\s)",unit) + for m in match: unit = unit.replace(m,"$\mu$") + # raise exponents using Latex + match = re.findall("(-\d)",unit) + for m in match: unit = unit.replace(m,"$^{%s}$" % m) + # add carbon symbol to all mass units + if add_carbon: + match = re.findall("(\D*g)",unit) + for m in match: unit = unit.replace(m,"%s C " % m) + return unit + +def ColorBar(ax,**keywords): + """Plot a colorbar. + + We plot colorbars separately so they can be rendered once and used + for multiple plots. + + Parameters + ---------- + ax : matplotlib.axes._subplots.AxesSubplot + the matplotlib axes object onto which you wish to plot the variable + vmin : float, optional + the minimum plotted value + vmax : float, optional + the maximum plotted value + cmap : str, optional + the name of the colormap to be used in plotting the spatial variable + label : str, optional + the text which appears with the colorbar + + """ + from matplotlib import colorbar,colors + vmin = keywords.get("vmin",None) + vmax = keywords.get("vmax",None) + cmap = keywords.get("cmap","jet") + ticks = keywords.get("ticks",None) + ticklabels = keywords.get("ticklabels",None) + label = keywords.get("label",None) + cb = colorbar.ColorbarBase(ax,cmap=cmap, + norm=colors.Normalize(vmin=vmin,vmax=vmax), + orientation='horizontal') + cb.set_label(label) + if ticks is not None: cb.set_ticks(ticks) + if ticklabels is not None: cb.set_ticklabels(ticklabels) + +def TaylorDiagram(stddev,corrcoef,refstd,fig,colors,normalize=True): + """Plot a Taylor diagram. + + This is adapted from the code by Yannick Copin found here: + + https://gist.github.com/ycopin/3342888 + + Parameters + ---------- + stddev : numpy.ndarray + an array of standard deviations + corrcoeff : numpy.ndarray + an array of correlation coefficients + refstd : float + the reference standard deviation + fig : matplotlib figure + the matplotlib figure + colors : array + an array of colors for each element of the input arrays + normalize : bool, optional + disable to skip normalization of the standard deviation + + """ + from matplotlib.projections import PolarAxes + import mpl_toolkits.axisartist.floating_axes as FA + import mpl_toolkits.axisartist.grid_finder as GF + + # define transform + tr = PolarAxes.PolarTransform() + + # correlation labels + rlocs = np.concatenate((np.arange(10)/10.,[0.95,0.99])) + tlocs = np.arccos(rlocs) + gl1 = GF.FixedLocator(tlocs) + tf1 = GF.DictFormatter(dict(zip(tlocs,map(str,rlocs)))) + + # standard deviation axis extent + if normalize: + stddev = stddev/refstd + refstd = 1. + smin = 0 + smax = max(2.0,1.1*stddev.max()) + + # add the curvilinear grid + ghelper = FA.GridHelperCurveLinear(tr, + extremes=(0,np.pi/2,smin,smax), + grid_locator1=gl1, + tick_formatter1=tf1) + ax = FA.FloatingSubplot(fig, 111, grid_helper=ghelper) + fig.add_subplot(ax) + + # adjust axes + ax.axis["top"].set_axis_direction("bottom") + ax.axis["top"].toggle(ticklabels=True,label=True) + ax.axis["top"].major_ticklabels.set_axis_direction("top") + ax.axis["top"].label.set_axis_direction("top") + ax.axis["top"].label.set_text("Correlation") + ax.axis["left"].set_axis_direction("bottom") + if normalize: + ax.axis["left"].label.set_text("Normalized standard deviation") + else: + ax.axis["left"].label.set_text("Standard deviation") + ax.axis["right"].set_axis_direction("top") + ax.axis["right"].toggle(ticklabels=True) + ax.axis["right"].major_ticklabels.set_axis_direction("left") + ax.axis["bottom"].set_visible(False) + ax.grid(True) + + ax = ax.get_aux_axes(tr) + # Plot data + corrcoef = corrcoef.clip(-1,1) + for i in range(len(corrcoef)): + ax.plot(np.arccos(corrcoef[i]),stddev[i],'o',color=colors[i],mew=0,ms=8) + + # Add reference point and stddev contour + l, = ax.plot([0],refstd,'k*',ms=12,mew=0) + t = np.linspace(0, np.pi/2) + r = np.zeros_like(t) + refstd + ax.plot(t,r, 'k--') + + # centralized rms contours + rs,ts = np.meshgrid(np.linspace(smin,smax), + np.linspace(0,np.pi/2)) + rms = np.sqrt(refstd**2 + rs**2 - 2*refstd*rs*np.cos(ts)) + contours = ax.contour(ts,rs,rms,5,colors='k',alpha=0.4) + ax.clabel(contours,fmt='%1.1f') + + + return ax + +class HtmlFigure(): + + def __init__(self,name,pattern,side=None,legend=False,benchmark=False,longname=None): + + self.name = name + self.pattern = pattern + self.side = side + self.legend = legend + self.benchmark = benchmark + self.longname = longname + + def generateClickRow(self,allModels=False): + name = self.pattern + if allModels: name = name.replace(self.name,"PNAME") + for token in ['CNAME','MNAME','RNAME','PNAME']: + name = name.split(token) + name = ("' + %s + '" % token).join(name) + name = "'%s'" % name + name = name.replace("'' + ","") + code = """ + document.getElementById('%s').src = %s""" % (self.name,name) + if self.benchmark: + name = self.pattern.replace('MNAME','Benchmark') + for token in ['CNAME','MNAME','RNAME']: + name = name.split(token) + name = ("' + %s + '" % token).join(name) + name = "'%s'" % name + name = name.replace("'' + ","") + code += """ + document.getElementById('benchmark_%s').src = %s""" % (self.name,name) + return code + + def __str__(self): + + code = """ +
    +
    """ % (self.name) + if self.side is not None: + code += """ +
    %s
    """ % (self.side.replace(" "," ")) + code += """ + Data not available""" % (self.name) + if self.legend: + code += """ +
    Data not available
    """ % (self.name.replace("benchmark_","")) + code += """ +
    +
    """ + return code + + +class HtmlPage(object): + + def __init__(self,name,title): + self.name = name + self.title = title + self.cname = "" + self.pages = [] + self.metric_dict = None + self.models = None + self.regions = None + self.metrics = None + self.units = None + self.priority = ["original","Model","intersection","complement","Benchmark","Bias","RMSE","Phase","Seasonal","Spatial","Interannual","Score","Overall"] + self.header = "CNAME" + self.sections = [] + self.figures = {} + self.text = None + self.inserts = [] + + def __str__(self): + + r = Regions() + def _sortFigures(figure): + macro = ["timeint","bias","rmse","iav","phase","shift","variance","spaceint","accumulate","cycle"] + val = 1. + for i,m in enumerate(macro): + if m in figure.name: val += 3**i + if figure.name.startswith("benchmark"): val -= 1. + if figure.name.endswith("score"): val += 1. + if figure.name.startswith("legend"): + if "variance" in figure.name: + val += 1. + else: + val = 0. + return val + + code = """ +
    +
    +

    %s

    """ % (self.name,self.name,self.title) + if self.pages: + code += """ +
    +
      """ + for page in self.pages: + opts = "" + if page == self: opts = " class=ui-btn-active ui-state-persist" + code += """ +
    • %s
    • """ % (page.name,opts,page.title) + code += """ +
    """ + code += """ +
    +
    """ + + if self.regions: + code += """ + """ + + if self.models: + code += """ +
    + +
    """ + + if self.metric_dict: code += self.metricsToHtmlTables() + + if self.text is not None: + code += """ + %s""" % self.text + + for section in self.sections: + if len(self.figures[section]) == 0: continue + self.figures[section].sort(key=_sortFigures) + code += """ +

    %s

    """ % section + for figure in self.figures[section]: + if figure.name == "spatial_variance": code += "
    " + code += "%s" % (figure) + code += """ +
    """ + + code += """ +
    """ + return code + + def setHeader(self,header): + self.header = header + + def setSections(self,sections): + + assert type(sections) == type([]) + self.sections = sections + for section in sections: self.figures[section] = [] + + def addFigure(self,section,name,pattern,side=None,legend=False,benchmark=False,longname=None): + + assert section in self.sections + for fig in self.figures[section]: + if fig.name == name: return + self.figures[section].append(HtmlFigure(name,pattern,side=side,legend=legend,benchmark=benchmark,longname=longname)) + + def setMetricPriority(self,priority): + self.priority = priority + + def metricsToHtmlTables(self): + if not self.metric_dict: return "" + regions = self.regions + metrics = self.metrics + units = self.units + cname = self.cname.split(" / ") + if len(cname) == 3: + cname = cname[1].strip() + else: + cname = cname[-1].strip() + html = "" + inserts = self.inserts + j0 = 0 if "Benchmark" in self.models else -1 + score_sig = 2 # number of significant digits used in the score tables + other_sig = 3 # number of significant digits used for non-score quantities + for region in regions: + html += """ +
    + + + + + """ % (self.name,region) + for i,metric in enumerate(metrics): + if i in inserts: html += """ + """ + html += """ + """ % (metric,units[metric]) + html += """ + + + """ + + for j,model in enumerate(self.models): + opts = ' onclick="highlightRow%s(this)"' % (self.name) if j > j0 else '' + html += """ + + %s + [-]""" % (opts,model,opts,cname,model) + for i,metric in enumerate(metrics): + sig = score_sig if "score" in metric.lower() else other_sig + if i in inserts: html += """ + """ % (opts) + add = "" + try: + add = ("%#." + "%d" % sig + "g") % self.metric_dict[model][region][metric].data + add = add.lower().replace("nan","") + except: + pass + html += """ + %s""" % (opts,add) + html += """ + """ + html += """ + +
    Download Data
    %s [%s]
    +
    """ + + return html + + def googleScript(self): + if not self.metric_dict: return "" + models = self.models + regions = self.regions + metrics = self.metrics + units = self.units + cname = self.cname.split(" / ") + if len(cname) == 3: + cname = cname[1].strip() + else: + cname = cname[-1].strip() + + + + rows = "" + for section in self.sections: + for figure in self.figures[section]: + rows += figure.generateClickRow() + + head = """ + + function updateImagesAndHeaders%s(){ + var rsel = document.getElementById("%sRegion"); + var msel = document.getElementById("%sModel"); + var rid = rsel.selectedIndex; + var mid = msel.selectedIndex; + var RNAME = rsel.options[rid].value; + var MNAME = msel.options[mid].value; + var CNAME = "%s"; + var head = "%s"; + head = head.replace("CNAME",CNAME).replace("RNAME",RNAME).replace("MNAME",MNAME); + $("#%sHead").text(head); + %s + }""" % (self.name,self.name,self.name,self.cname,self.header,self.name,rows) + + nscores = len(metrics) + if len(self.inserts) > 0: nscores -= self.inserts[-1] + r0 = 2 if "Benchmark" in models else 1 + + head += """ + + function highlightRow%s(cell) { + var select = document.getElementById("%sRegion"); + for (var i = 0; i < select.length; i++){ + var table = document.getElementById("%s_table_" + select.options[i].value); + var rows = table.getElementsByTagName("tr"); + for (var r = %d; r < rows.length; r++) { + for (var c = 0; c < rows[r].cells.length-%d; c++) { + rows[r].cells[c].style.backgroundColor = "#ffffff"; + } + } + var r = cell.closest("tr").rowIndex; + document.getElementById("%sModel").selectedIndex = r-1; + for (var c = 0; c < rows[r].cells.length-%d; c++) { + rows[r].cells[c].style.backgroundColor = "#c1c1c1"; + } + } + updateImagesAndHeaders%s(); + }""" % (self.name,self.name,self.name,r0,nscores+1,self.name,nscores+1,self.name) + + head += """ + + function paintScoreCells%s(RNAME) { + var colors = ['#fb6a4a','#fc9272','#fcbba1','#fee0d2','#fff5f0','#f7fcf5','#e5f5e0','#c7e9c0','#a1d99b','#74c476']; + var table = document.getElementById("%s_table_" + RNAME); + var rows = table.getElementsByTagName("tr"); + for (var c = rows[0].cells.length-%d; c < rows[0].cells.length; c++) { + var scores = []; + for (var r = %d; r < rows.length; r++) { + val = rows[r].cells[c].innerHTML; + if (val=="") { + scores[r-%d] = 0; + }else{ + scores[r-%d] = parseFloat(val); + } + } + var mean = math.mean(scores); + var std = math.max(0.02,math.std(scores)); + for (var r = %d; r < rows.length; r++) { + scores[r-%d] = (scores[r-%d]-mean)/std; + } + var smax = math.max(scores); + var smin = math.min(scores); + if (math.abs(smax-smin) < 1e-12) { + smin = -1.0; + smax = 1.0; + } + for (var r = %d; r < rows.length; r++) { + var clr = math.round((scores[r-%d]-smin)/(smax-smin)*10); + clr = math.min(9,math.max(0,clr)); + rows[r].cells[c].style.backgroundColor = colors[clr]; + } + } + }""" % (self.name,self.name,nscores,r0,r0,r0,r0,r0,r0,r0,r0) + + head += """ + + function pageLoad%s() { + var select = document.getElementById("%sRegion"); + var region = getQueryVariable("region"); + var model = getQueryVariable("model"); + if (region) { + for (var i = 0; i < select.length; i++){ + if (select.options[i].value == region) select.selectedIndex = i; + } + } + var table = document.getElementById("%s_table_" + select.options[select.selectedIndex].value); + var rows = table.getElementsByTagName("tr"); + if (model) { + for (var r = 0; r < rows.length; r++) { + if(rows[r].cells[0].innerHTML==model) highlightRow%s(rows[r].cells[0]); + } + }else{ + highlightRow%s(rows[%d]); + } + for (var i = 0; i < select.length; i++){ + paintScoreCells%s(select.options[i].value); + } + changeRegion%s(); + } + + function changeRegion%s() { + var select = document.getElementById("%sRegion"); + for (var i = 0; i < select.length; i++){ + RNAME = select.options[i].value; + if (i == select.selectedIndex) { + document.getElementById("%s_table_" + RNAME).style.display = "table"; + }else{ + document.getElementById("%s_table_" + RNAME).style.display = "none"; + } + } + updateImagesAndHeaders%s(); + }""" % (self.name,self.name,self.name,self.name,self.name,r0,self.name,self.name,self.name,self.name,self.name,self.name,self.name) + + return head,"pageLoad%s" % self.name,"" + + def setRegions(self,regions): + assert type(regions) == type([]) + self.regions = regions + + def setMetrics(self,metric_dict): + + # Sorting function + def _sortMetrics(name,priority=self.priority): + val = 1. + for i,pname in enumerate(priority): + if pname in name: val += 2**i + return val + + assert type(metric_dict) == type({}) + self.metric_dict = metric_dict + + # Build and sort models, regions, and metrics + models = self.metric_dict.keys() + regions = [] + metrics = [] + units = {} + for model in models: + for region in self.metric_dict[model].keys(): + if region not in regions: regions.append(region) + for metric in self.metric_dict[model][region].keys(): + units[metric] = self.metric_dict[model][region][metric].unit + if metric not in metrics: metrics.append(metric) + models.sort(key=lambda key: key.lower()) + if "Benchmark" in models: models.insert(0,models.pop(models.index("Benchmark"))) + regions.sort() + metrics.sort(key=_sortMetrics) + self.models = models + if self.regions is None: self.regions = regions + self.metrics = metrics + self.units = units + + tmp = [("bias" in m.lower()) for m in metrics] + if tmp.count(True) > 0: self.inserts.append(tmp.index(True)) + tmp = [("score" in m.lower()) for m in metrics] + if tmp.count(True) > 0: self.inserts.append(tmp.index(True)) + + def head(self): + return "" + +class HtmlAllModelsPage(HtmlPage): + + def __init__(self,name,title): + + super(HtmlAllModelsPage,self).__init__(name,title) + self.plots = None + self.nobench = None + self.nolegend = [] + + def _populatePlots(self): + + self.plots = [] + bench = [] + for page in self.pages: + if page.sections is not None: + for section in page.sections: + if len(page.figures[section]) == 0: continue + for figure in page.figures[section]: + if (figure.name in ["spatial_variance","compcycle","profile", + "legend_spatial_variance","legend_compcycle"]): continue # ignores + if "benchmark" in figure.name: + if figure.name not in bench: bench.append(figure.name) + continue + if figure not in self.plots: self.plots.append(figure) + if not figure.legend: self.nolegend.append(figure.name) + self.nobench = [plot.name for plot in self.plots if "benchmark_%s" % (plot.name) not in bench] + + def __str__(self): + + if self.plots is None: self._populatePlots() + r = Regions() + + code = """ +
    +
    +

    %s

    """ % (self.name,self.name,self.title) + if self.pages: + code += """ +
    +
      """ + for page in self.pages: + opts = "" + if page == self: opts = " class=ui-btn-active ui-state-persist" + code += """ +
    • %s
    • """ % (page.name,opts,page.title) + code += """ +
    """ + code += """ +
    +
    """ + + if self.regions: + code += """ + """ + + if self.plots: + code += """ + """ + + fig = self.plots[0] + rem_side = fig.side + fig.side = "MNAME" + rem_leg = fig.legend + fig.legend = True + img = "%s" % (fig) + img = img.replace('"leg"','"MNAME_legend"').replace("%s" % fig.name,"MNAME") + fig.side = rem_side + fig.legend = rem_leg + for model in self.pages[0].models: + code += img.replace("MNAME",model) + + if self.text is not None: + code += """ + %s""" % self.text + + code += """ +
    """ + return code + + def googleScript(self): + head = self.head() + return head,"","" + + def head(self): + + if self.plots is None: self._populatePlots() + + models = self.pages[0].models + regions = self.regions + try: + regions.sort() + except: + pass + head = """ + function AllSelect() { + var header = "%s"; + var CNAME = "%s"; + header = header.replace("CNAME",CNAME); + var rid = document.getElementById("%s").selectedIndex; + var RNAME = document.getElementById("%s").options[rid].value; + var pid = document.getElementById("%s").selectedIndex; + var PNAME = document.getElementById("%s").options[pid].value; + header = header.replace("RNAME",RNAME); + $("#%sHead").text(header);""" % (self.header,self.cname,self.name+"Region",self.name+"Region",self.name+"Plot",self.name+"Plot",self.name) + cond = " || ".join(['PNAME == "%s"' % n for n in self.nobench]) + if cond == "": cond = "0" + head += """ + if(%s){ + document.getElementById("Benchmark_div").style.display = 'none'; + }else{ + document.getElementById("Benchmark_div").style.display = 'inline'; + }""" % (cond) + + cond = " || ".join(['PNAME == "%s"' % n for n in self.nolegend]) + if cond == "": cond = "0" + head += """ + if(%s){""" % cond + for model in models: + head += """ + document.getElementById("%s_legend").style.display = 'none';""" % model + head += """ + }else{""" + for model in models: + head += """ + document.getElementById("%s_legend").style.display = 'inline';""" % model + head += """ + }""" + for model in models: + head += """ + document.getElementById('%s').src = '%s_' + RNAME + '_' + PNAME + '.png'; + document.getElementById('%s_legend').src = 'legend_' + PNAME + '.png';""" % (model,model,model) + head += """ + } + + $(document).on('pageshow', '[data-role="page"]', function(){ + AllSelect() + });""" + return head + +class HtmlSitePlotsPage(HtmlPage): + + def __init__(self,name,title): + + super(HtmlSitePlotsPage,self).__init__(name,title) + + def __str__(self): + + # setup page navigation + code = """ +
    +
    +

    %s

    """ % (self.name,self.name,self.title) + if self.pages: + code += """ +
    +
      """ + for page in self.pages: + opts = "" + if page == self: opts = " class=ui-btn-active ui-state-persist" + code += """ +
    • %s
    • """ % (page.name,opts,page.title) + code += """ +
    """ + code += """ +
    +
    """ + + code += """ + """ + + code += """ + """ + + code += """ +
    +
    +
    Data not available
    +
    """ + + code += """ +
    """ + + return code + + def setMetrics(self,metric_dict): + self.models.sort() + + def googleScript(self): + + callback = "%sMap()" % (self.name) + head = """ + function %sMap() { + var sitedata = google.visualization.arrayToDataTable( + [['Latitude', 'Longitude', '%s [%s]'],\n""" % (self.name,self.vname,self.unit) + + for lat,lon,val in zip(self.lat,self.lon,self.vals): + if val is np.ma.masked: + sval = "null" + else: + sval = "%.2f" % val + head += " [%.3f,%.3f,%s],\n" % (lat,lon,sval) + head = head[:-2] + "]);\n" + head += (" var names = %s;" % (self.sites)).replace("u'","'").replace(", '",",'") + head += """ + var options = { + dataMode: 'markers', + magnifyingGlass: {enable: true, zoomFactor: 3.}, + }; + var container = document.getElementById('map_canvas'); + var geomap = new google.visualization.GeoChart(container); + function updateMap() { + var mid = document.getElementById("%sModel").selectedIndex; + var MNAME = document.getElementById("%sModel").options[mid].value; + var rid = document.getElementById("%sSite" ).selectedIndex; + var RNAME = document.getElementById("%sSite" ).options[rid].value; + document.getElementById('time').src = MNAME + '_' + RNAME + '_time.png'; + } + function clickMap() { + var select = geomap.getSelection(); + if (Object.keys(select).length == 1) { + var site = $("select#SitePlotsSite"); + site[0].selectedIndex = select[0].row; + site.selectmenu('refresh'); + } + updateMap(); + } + google.visualization.events.addListener(geomap,'select',clickMap); + geomap.draw(sitedata, options); + updateMap(); + };""" % (self.name,self.name,self.name,self.name) + + return head,callback,"geomap" + + def head(self): + return "" + +class HtmlLayout(): + + def __init__(self,pages,cname,years=None): + + self.pages = pages + self.cname = cname.replace("/"," / ") + if years is not None: + try: + self.cname += " / %d-%d" % (years) + except: + pass + for page in self.pages: + page.pages = self.pages + page.cname = self.cname + + def __str__(self): + code = """ + """ + + code += """ + + + + + + """ + + functions = [] + callbacks = [] + packages = [] + for page in self.pages: + out = page.googleScript() + if len(out) == 3: + f,c,p = out + if f != "": functions.append(f) + if c != "": callbacks.append(c) + if p != "": packages.append(p) + + code += """ + """ + + code += """ + """ + + max_height = 280 # will be related to max column header length across all pages + code += """ + """ % (max_height,max_height/2-5) + + code += """ + + """ + + ### loop over pages + for page in self.pages: code += "%s" % (page) + + code += """ + +""" + return code + +def RegisterCustomColormaps(): + """Adds the 'stoplight' and 'RdGn' colormaps to matplotlib's database + + """ + import colorsys as cs + + # stoplight colormap + Rd1 = [1.,0.,0.]; Rd2 = Rd1 + Yl1 = [1.,1.,0.]; Yl2 = Yl1 + Gn1 = [0.,1.,0.]; Gn2 = Gn1 + val = 0.65 + Rd1 = cs.rgb_to_hsv(Rd1[0],Rd1[1],Rd1[2]) + Rd1 = cs.hsv_to_rgb(Rd1[0],Rd1[1],val ) + Yl1 = cs.rgb_to_hsv(Yl1[0],Yl1[1],Yl1[2]) + Yl1 = cs.hsv_to_rgb(Yl1[0],Yl1[1],val ) + Gn1 = cs.rgb_to_hsv(Gn1[0],Gn1[1],Gn1[2]) + Gn1 = cs.hsv_to_rgb(Gn1[0],Gn1[1],val ) + p = 0 + level1 = 0.5 + level2 = 0.75 + RdYlGn = {'red': ((0.0 , 0.0 ,Rd1[0]), + (level1-p, Rd2[0],Rd2[0]), + (level1+p, Yl1[0],Yl1[0]), + (level2-p, Yl2[0],Yl2[0]), + (level2+p, Gn1[0],Gn1[0]), + (1.00 , Gn2[0], 0.0)), + + 'green': ((0.0 , 0.0 ,Rd1[1]), + (level1-p, Rd2[1],Rd2[1]), + (level1+p, Yl1[1],Yl1[1]), + (level2-p, Yl2[1],Yl2[1]), + (level2+p, Gn1[1],Gn1[1]), + (1.00 , Gn2[1], 0.0)), + + 'blue': ((0.0 , 0.0 ,Rd1[2]), + (level1-p, Rd2[2],Rd2[2]), + (level1+p, Yl1[2],Yl1[2]), + (level2-p, Yl2[2],Yl2[2]), + (level2+p, Gn1[2],Gn1[2]), + (1.00 , Gn2[2], 0.0))} + plt.register_cmap(name='stoplight', data=RdYlGn) + + # RdGn colormap + val = 0.8 + Rd = cs.rgb_to_hsv(1,0,0) + Rd = cs.hsv_to_rgb(Rd[0],Rd[1],val) + Gn = cs.rgb_to_hsv(0,1,0) + Gn = cs.hsv_to_rgb(Gn[0],Gn[1],val) + RdGn = {'red': ((0.0, 0.0, Rd[0]), + (0.5, 1.0 , 1.0 ), + (1.0, Gn[0], 0.0 )), + 'green': ((0.0, 0.0, Rd[1]), + (0.5, 1.0, 1.0 ), + (1.0, Gn[1], 0.0 )), + 'blue': ((0.0, 0.0, Rd[2]), + (0.5, 1.0, 1.0 ), + (1.0, Gn[2], 0.0 ))} + plt.register_cmap(name='RdGn', data=RdGn) + + +def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=False): + """Creates a summary figure for the benchmark results contained in the + data array. + + Parameters + ---------- + models : list + a list of the model names + variables : list + a list of the variable names + data : numpy.ndarray or numpy.ma.ndarray + data scores whose shape is ( len(variables), len(models) ) + figname : str + the full path of the output file to write + vcolor : list, optional + an array parallel to the variables array containing background + colors for the labels to be displayed on the y-axis. + """ + from mpl_toolkits.axes_grid1 import make_axes_locatable + + # data checks + assert type(models) is type(list()) + assert type(variables) is type(list()) + assert (type(data) is type(np .empty(1)) or + type(data) is type(np.ma.empty(1))) + assert data.shape[0] == len(variables) + assert data.shape[1] == len(models ) + assert type(figname) is type("") + if vcolor is not None: + assert type(vcolor) is type(list()) + assert len(vcolor) == len(variables) + + # define some parameters + nmodels = len(models) + nvariables = len(variables) + maxV = max([len(v) for v in variables]) + maxM = max([len(m) for m in models]) + wpchar = 0.15 + wpcell = 0.19 + hpcell = 0.25 + w = maxV*wpchar + max(4,nmodels)*wpcell + if not rel_only: w += (max(4,nmodels)+1)*wpcell + h = maxM*wpchar + nvariables*hpcell + 1.0 + + bad = 0.5 + if "stoplight" not in plt.colormaps(): RegisterCustomColormaps() + + # plot the variable scores + if rel_only: + fig,ax = plt.subplots(figsize=(w,h),ncols=1,tight_layout=True) + ax = [ax] + else: + fig,ax = plt.subplots(figsize=(w,h),ncols=2,tight_layout=True) + + # absolute score + if not rel_only: + cmap = plt.get_cmap('stoplight') + cmap.set_bad('k',bad) + qc = ax[0].pcolormesh(np.ma.masked_invalid(data[::-1,:]),cmap=cmap,vmin=0,vmax=1,linewidth=0) + div = make_axes_locatable(ax[0]) + fig.colorbar(qc, + ticks=(0,0.25,0.5,0.75,1.0), + format="%g", + cax=div.append_axes("bottom", size="5%", pad=0.05), + orientation="horizontal", + label="Absolute Score") + plt.tick_params(which='both', length=0) + ax[0].xaxis.tick_top() + ax[0].set_xticks (np.arange(nmodels )+0.5) + ax[0].set_xticklabels(models,rotation=90) + ax[0].set_yticks (np.arange(nvariables)+0.5) + ax[0].set_yticklabels(variables[::-1]) + ax[0].tick_params('both',length=0,width=0,which='major') + ax[0].tick_params(axis='y',pad=10) + ax[0].set_xlim(0,nmodels) + ax[0].set_ylim(0,nvariables) + if vcolor is not None: + for i,t in enumerate(ax[0].yaxis.get_ticklabels()): + t.set_backgroundcolor(vcolor[::-1][i]) + + # relative score + i = 0 if rel_only else 1 + np.seterr(invalid='ignore',under='ignore') + data = np.ma.masked_invalid(data) + data.data[data.mask] = 1. + data = np.ma.masked_values(data,1.) + mean = data.mean(axis=1) + std = data.std (axis=1).clip(0.02) + np.seterr(invalid='ignore',under='ignore') + Z = (data-mean[:,np.newaxis])/std[:,np.newaxis] + Z = np.ma.masked_invalid(Z) + np.seterr(invalid='warn',under='raise') + cmap = plt.get_cmap('RdGn') + cmap.set_bad('k',bad) + qc = ax[i].pcolormesh(Z[::-1],cmap=cmap,vmin=-2,vmax=2,linewidth=0) + div = make_axes_locatable(ax[i]) + fig.colorbar(qc, + ticks=(-2,-1,0,1,2), + format="%+d", + cax=div.append_axes("bottom", size="5%", pad=0.05), + orientation="horizontal", + label="Relative Score") + plt.tick_params(which='both', length=0) + ax[i].xaxis.tick_top() + ax[i].set_xticks(np.arange(nmodels)+0.5) + ax[i].set_xticklabels(models,rotation=90) + ax[i].tick_params('both',length=0,width=0,which='major') + ax[i].set_yticks([]) + ax[i].set_xlim(0,nmodels) + ax[i].set_ylim(0,nvariables) + if rel_only: + ax[i].set_yticks (np.arange(nvariables)+0.5) + ax[i].set_yticklabels(variables[::-1]) + if vcolor is not None: + for i,t in enumerate(ax[i].yaxis.get_ticklabels()): + t.set_backgroundcolor(vcolor[::-1][i]) + + # save figure + fig.savefig(figname) + +def WhittakerDiagram(X,Y,Z,**keywords): + """Creates a Whittaker diagram. + + Parameters + ---------- + X : ILAMB.Variable.Variable + the first independent axis, classically representing temperature + Y : ILAMB.Variable.Variable + the second independent axis, classically representing precipitation + Z : ILAMB.Variable.Variable + the dependent axis + X_plot_unit,Y_plot_unit,Z_plot_unit : str, optional + the string representing the units of the corresponding variable + region : str, optional + the string representing the region overwhich to plot the diagram + X_min,Y_min,Z_min : float, optional + the minimum plotted value of the corresponding variable + X_max,Y_max,Z_max : float, optional + the maximum plotted value of the corresponding variable + X_label,Y_label,Z_label : str, optional + the labels of the corresponding variable + filename : str, optional + the output filename + """ + from mpl_toolkits.axes_grid1 import make_axes_locatable + + # possibly integrate in time + if X.temporal: X = X.integrateInTime(mean=True) + if Y.temporal: Y = Y.integrateInTime(mean=True) + if Z.temporal: Z = Z.integrateInTime(mean=True) + + # convert to plot units + X_plot_unit = keywords.get("X_plot_unit",X.unit) + Y_plot_unit = keywords.get("Y_plot_unit",Y.unit) + Z_plot_unit = keywords.get("Z_plot_unit",Z.unit) + if X_plot_unit is not None: X.convert(X_plot_unit) + if Y_plot_unit is not None: Y.convert(Y_plot_unit) + if Z_plot_unit is not None: Z.convert(Z_plot_unit) + + # flatten data, if any data is masked all the data is masked + mask = (X.data.mask + Y.data.mask + Z.data.mask)==0 + + # mask outside region + from constants import regions as ILAMBregions + region = keywords.get("region","global") + lats,lons = ILAMBregions[region] + mask += (np.outer((X.lat>lats[0])*(X.latlons[0])*(X.lon=lat[:-1])*(var.lat[:,np.newaxis]<=lat[1:])).argmax(axis=1) + cols = ((var.lon[:,np.newaxis]>=lon[:-1])*(var.lon[:,np.newaxis]<=lon[1:])).argmax(axis=1) + else: + # if more globally defined, nearest neighbor is fine + rows = (np.abs(lat[:,np.newaxis]-var.lat)).argmin(axis=0) + cols = (np.abs(lon[:,np.newaxis]-var.lon)).argmin(axis=0) + if var.ndata: return mask[np.ix_(rows,cols)].diagonal() + return mask[np.ix_(rows,cols)] + + def hasData(self,label,var): + """Checks if the ILAMB.Variable has data on the given region. + + Parameters + ---------- + label : str + the unique region identifier + var : ILAMB.Variable.Variable + the variable to which we would like check for data + + Returns + ------- + hasdata : boolean + returns True if variable has data on the given region + """ + axes = range(var.data.ndim) + if var.spatial: axes = axes[:-2] + if var.ndata : axes = axes[:-1] + keep = (var.data.mask == False).any(axis=tuple(axes)) + keep *= (self.getMask(label,var)==False) + if keep.sum() > 0: return True + return False + +if "global" not in Regions().regions: + + # Populate some regions + r = Regions() + r.addRegionLatLonBounds("global","Globe",(-89.999, 89.999),(-179.999, 179.999)) + Regions._regions["global"][3][...] = 0. # ensure global mask is null + + # GFED regions + r.addRegionLatLonBounds("bona","Boreal North America", ( 49.75, 79.75),(-170.25,- 60.25)) + r.addRegionLatLonBounds("tena","Temperate North America", ( 30.25, 49.75),(-125.25,- 66.25)) + r.addRegionLatLonBounds("ceam","Central America", ( 9.75, 30.25),(-115.25,- 80.25)) + r.addRegionLatLonBounds("nhsa","Northern Hemisphere South America",( 0.25, 12.75),(- 80.25,- 50.25)) + r.addRegionLatLonBounds("shsa","Southern Hemisphere South America",(-59.75, 0.25),(- 80.25,- 33.25)) + r.addRegionLatLonBounds("euro","Europe", ( 35.25, 70.25),(- 10.25, 30.25)) + r.addRegionLatLonBounds("mide","Middle East", ( 20.25, 40.25),(- 10.25, 60.25)) + r.addRegionLatLonBounds("nhaf","Northern Hemisphere Africa", ( 0.25, 20.25),(- 20.25, 45.25)) + r.addRegionLatLonBounds("shaf","Southern Hemisphere Africa", (-34.75, 0.25),( 10.25, 45.25)) + r.addRegionLatLonBounds("boas","Boreal Asia", ( 54.75, 70.25),( 30.25, 179.75)) + r.addRegionLatLonBounds("ceas","Central Asia", ( 30.25, 54.75),( 30.25, 142.58)) + r.addRegionLatLonBounds("seas","Southeast Asia", ( 5.25, 30.25),( 65.25, 120.25)) + r.addRegionLatLonBounds("eqas","Equatorial Asia", (-10.25, 10.25),( 99.75, 150.25)) + r.addRegionLatLonBounds("aust","Australia", (-41.25,-10.50),( 112.00, 154.00)) diff --git a/src/ILAMB/Scoreboard.py b/src/ILAMB/Scoreboard.py new file mode 100644 index 00000000..5fceb929 --- /dev/null +++ b/src/ILAMB/Scoreboard.py @@ -0,0 +1,699 @@ +from Confrontation import Confrontation +from ConfNBP import ConfNBP +from ConfTWSA import ConfTWSA +from ConfRunoff import ConfRunoff +from ConfEvapFraction import ConfEvapFraction +from ConfIOMB import ConfIOMB +from ConfDiurnal import ConfDiurnal +from ConfPermafrost import ConfPermafrost +import os,re +from netCDF4 import Dataset +import numpy as np +from Post import BenchmarkSummaryFigure +from ilamblib import MisplacedData + +global_print_node_string = "" +global_confrontation_list = [] +global_model_list = [] + +class Node(object): + + def __init__(self, name): + self.name = name + self.children = [] + self.parent = None + self.source = None + self.cmap = None + self.variable = None + self.alternate_vars = None + self.derived = None + self.land = False + self.confrontation = None + self.output_path = None + self.bgcolor = "#EDEDED" + self.table_unit = None + self.plot_unit = None + self.space_mean = True + self.relationships = None + self.ctype = None + self.regions = None + self.skip_rmse = False + self.skip_iav = False + self.mass_weighting = False + self.weight = 1 # if a dataset has no weight specified, it is implicitly 1 + self.sum_weight_children = 0 # what is the sum of the weights of my children? + self.normalize_weight = 0 # my weight relative to my siblings + self.overall_weight = 0 # the multiplication my normalized weight by all my parents' normalized weights + self.score = 0 # placeholder + + def __str__(self): + if self.parent is None: return "" + name = self.name if self.name is not None else "" + weight = self.weight + if self.isLeaf(): + s = "%s%s %s" % (" "*(self.getDepth()-1),name,self.score) + else: + s = "%s%s %s" % (" "*(self.getDepth()-1),name,self.score) + return s + + def isLeaf(self): + if len(self.children) == 0: return True + return False + + def addChild(self, node): + node.parent = self + self.children.append(node) + + def getDepth(self): + depth = 0 + parent = self.parent + while parent is not None: + depth += 1 + parent = parent.parent + return depth + +def TraversePostorder(node,visit): + for child in node.children: TraversePostorder(child,visit) + visit(node) + +def TraversePreorder(node,visit): + visit(node) + for child in node.children: TraversePreorder(child,visit) + +def PrintNode(node): + global global_print_node_string + global_print_node_string += "%s\n" % (node) + +def ConvertTypes(node): + def _to_bool(a): + if type(a) is type(True): return a + if type(a) is type("") : return a.lower() == "true" + node.weight = float(node.weight) + node.land = _to_bool(node.land) + node.space_mean = _to_bool(node.space_mean) + if node.regions is not None: node.regions = node.regions.split(",") + if node.relationships is not None: node.relationships = node.relationships.split(",") + if node.alternate_vars is not None: + node.alternate_vars = node.alternate_vars.split(",") + else: + node.alternate_vars = [] + +def SumWeightChildren(node): + for child in node.children: node.sum_weight_children += child.weight + +def NormalizeWeights(node): + if node.parent is not None: + sumw = 1. + if node.parent.sum_weight_children > 0: sumw = node.parent.sum_weight_children + node.normalize_weight = node.weight/sumw + +def OverallWeights(node): + if node.isLeaf(): + node.overall_weight = node.normalize_weight + parent = node.parent + while parent.parent is not None: + node.overall_weight *= parent.normalize_weight + parent = parent.parent + +def InheritVariableNames(node): + if node.parent is None: return + if node.variable is None: node.variable = node.parent.variable + if node.derived is None: node.derived = node.parent.derived + if node.cmap is None: node.cmap = node.parent.cmap + if node.ctype is None: node.ctype = node.parent.ctype + if node.skip_rmse is False: node.skip_rmse = node.parent.skip_rmse + if node.skip_iav is False: node.skip_iav = node.parent.skip_iav + if node.mass_weighting is False: node.mass_weighting = node.parent.mass_weighting + node.alternate_vars = node.parent.alternate_vars + +def ParseScoreboardConfigureFile(filename): + root = Node(None) + previous_node = root + current_level = 0 + for line in file(filename).readlines(): + line = line.strip() + if line.startswith("#"): continue + m1 = re.search(r"\[h(\d):\s+(.*)\]",line) + m2 = re.search(r"\[(.*)\]",line) + m3 = re.search(r"(.*)=(.*)",line) + if m1: + level = int(m1.group(1)) + assert level-current_level<=1 + name = m1.group(2) + node = Node(name) + if level == current_level: + previous_node.parent.addChild(node) + elif level > current_level: + previous_node.addChild(node) + current_level = level + else: + addto = root + for i in range(level-1): addto = addto.children[-1] + addto.addChild(node) + current_level = level + previous_node = node + + if not m1 and m2: + node = Node(m2.group(1)) + previous_node.addChild(node) + + if m3: + keyword = m3.group(1).strip() + value = m3.group(2).strip().replace('"','') + #if keyword not in node.__dict__.keys(): continue + try: + node.__dict__[keyword] = value + except: + pass + + TraversePreorder (root,ConvertTypes) + TraversePostorder(root,SumWeightChildren) + TraversePreorder (root,NormalizeWeights) + TraversePreorder (root,OverallWeights) + TraversePostorder(root,InheritVariableNames) + return root + + +ConfrontationTypes = { None : Confrontation, + "ConfNBP" : ConfNBP, + "ConfTWSA" : ConfTWSA, + "ConfRunoff" : ConfRunoff, + "ConfEvapFraction": ConfEvapFraction, + "ConfIOMB" : ConfIOMB, + "ConfDiurnal" : ConfDiurnal, + "ConfPermafrost" : ConfPermafrost} + +class Scoreboard(): + """ + A class for managing confrontations + """ + def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None,rel_only=False): + + if not os.environ.has_key('ILAMB_ROOT'): + raise ValueError("You must set the environment variable 'ILAMB_ROOT'") + self.build_dir = build_dir + self.rel_only = rel_only + + if (master and not os.path.isdir(self.build_dir)): os.mkdir(self.build_dir) + + self.tree = ParseScoreboardConfigureFile(filename) + max_name_len = 45 + + def _initConfrontation(node): + if not node.isLeaf(): return + + # if the user hasn't set regions, use the globally defined ones + if node.regions is None: node.regions = regions + + # pick the confrontation to use, is it a built-in confrontation? + if ConfrontationTypes.has_key(node.ctype): + Constructor = ConfrontationTypes[node.ctype] + else: + # try importing the confrontation + conf = __import__(node.ctype) + Constructor = conf.__dict__[node.ctype] + + try: + if node.cmap is None: node.cmap = "jet" + node.source = os.path.join(os.environ["ILAMB_ROOT"],node.source) + node.confrontation = Constructor(**(node.__dict__)) + node.confrontation.extents = extents + + if verbose and master: print (" {0:>%d}\033[92m Initialized\033[0m" % max_name_len).format(node.confrontation.longname) + + except MisplacedData: + + if (master and verbose): + longname = node.output_path + longname = longname.replace("//","/").replace(self.build_dir,"") + if longname[-1] == "/": longname = longname[:-1] + longname = "/".join(longname.split("/")[1:]) + print (" {0:>%d}\033[91m MisplacedData\033[0m" % max_name_len).format(longname) + + def _buildDirectories(node): + if node.name is None: return + path = "" + parent = node + while parent.name is not None: + path = os.path.join(parent.name.replace(" ",""),path) + parent = parent.parent + path = os.path.join(self.build_dir,path) + if not os.path.isdir(path) and master: os.mkdir(path) + node.output_path = path + + TraversePreorder(self.tree,_buildDirectories) + TraversePreorder(self.tree,_initConfrontation) + + def __str__(self): + global global_print_node_string + global_print_node_string = "" + TraversePreorder(self.tree,PrintNode) + return global_print_node_string + + def list(self): + def _hasConfrontation(node): + global global_confrontation_list + if node.confrontation is not None: + global_confrontation_list.append(node.confrontation) + global global_confrontation_list + global_confrontation_list = [] + TraversePreorder(self.tree,_hasConfrontation) + return global_confrontation_list + + def createHtml(self,M,filename="index.html"): + + # Create html assets + from pylab import imsave + arrows = np.zeros((32,16,4)) + for i in range(7): + arrows[ 4+i,(7-i):(7+i+1),3] = 1 + arrows[27-i,(7-i):(7+i+1),3] = 1 + imsave("%s/arrows.png" % self.build_dir,arrows) + + # Create a tree for relationship scores (hack) + rel_tree = GenerateRelationshipTree(self,M) + has_rel = np.asarray([len(rel.children) for rel in rel_tree.children]).sum() > 0 + nav = "" + if has_rel: + GenerateRelSummaryFigure(rel_tree,M,"%s/overview_rel.png" % self.build_dir,rel_only=self.rel_only) + nav = """ +
  • Relationship
  • """ + #global global_print_node_string + #global_print_node_string = "" + #TraversePreorder(rel_tree,PrintNode) + #print global_print_node_string + + from ILAMB.generated_version import version as ilamb_version + html = r""" + + + ILAMB Benchmark Results + + + + + + """ + html += """ + """ + html += """ + + + """ + + html += """ +
    +
    +

    ILAMB Benchmark Results

    + +
    +
    + +
    +
    +
    ILAMB %s
    +
    +
    """ % (nav,ilamb_version) + + if has_rel: + html += """ +
    +
    +

    ILAMB Benchmark Results

    + +
    +
    + +
    +
    +
    +
    """ + + html += """ +
    +
    +

    ILAMB Benchmark Results

    + +
    + +
    +

    Mean State Scores

    + + + + """ % nav + for m in M: + html += """ + """ % m.name + html += """ + + + + """ + + html += GenerateTable(self.tree,M,self) + + html += """ + +
    %s
    +
    """ + + if has_rel: + html += """ +

    Relationship Scores

    + + + + """ + for m in M: + html += """ + """ % m.name + html += """ + + + + """ + html += GenerateTable(rel_tree,M,self,composite=False) + html += """ + +
    %s
    +
    """ + + html += """ +
    +
    +
    + + +""" + file("%s/%s" % (self.build_dir,filename),"w").write(html) + + def createBarCharts(self,M): + html = GenerateBarCharts(self.tree,M) + + def createSummaryFigure(self,M): + GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir,rel_only=self.rel_only) + + def dumpScores(self,M,filename): + out = file("%s/%s" % (self.build_dir,filename),"w") + out.write("Variables,%s\n" % (",".join([m.name for m in M]))) + for cat in self.tree.children: + for v in cat.children: + try: + out.write("%s,%s\n" % (v.name,','.join([str(s) for s in v.score]))) + except: + out.write("%s,%s\n" % (v.name,','.join(["~"]*len(M)))) + out.close() + +def CompositeScores(tree,M): + global global_model_list + global_model_list = M + def _loadScores(node): + if node.isLeaf(): + if node.confrontation is None: return + data = np.zeros(len(global_model_list)) + mask = np.ones (len(global_model_list),dtype=bool) + for ind,m in enumerate(global_model_list): + fname = "%s/%s_%s.nc" % (node.confrontation.output_path,node.confrontation.name,m.name) + if os.path.isfile(fname): + try: + dataset = Dataset(fname) + grp = dataset.groups["MeanState"].groups["scalars"] + except: + continue + if grp.variables.has_key("Overall Score global"): + data[ind] = grp.variables["Overall Score global"][0] + mask[ind] = 0 + else: + data[ind] = -999. + mask[ind] = 1 + node.score = np.ma.masked_array(data,mask=mask) + else: + node.score = 0 + sum_weights = 0 + for child in node.children: + node.score += child.score*child.weight + sum_weights += child.weight + np.seterr(over='ignore',under='ignore') + node.score /= sum_weights + np.seterr(over='raise',under='raise') + TraversePostorder(tree,_loadScores) + +global_html = "" +global_table_color = "" + +def DarkenRowColor(clr,fraction=0.9): + from colorsys import rgb_to_hsv,hsv_to_rgb + def hex_to_rgb(value): + value = value.lstrip('#') + lv = len(value) + rgb = tuple(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3)) + rgb = np.asarray(rgb)/255. + return rgb + def rgb_to_hex(rgb): + return '#%02x%02x%02x' % rgb + rgb = hex_to_rgb(clr) + hsv = rgb_to_hsv(rgb[0],rgb[1],rgb[2]) + rgb = hsv_to_rgb(hsv[0],hsv[1],fraction*hsv[2]) + rgb = tuple(np.asarray(np.asarray(rgb)*255.,dtype=int)) + return rgb_to_hex(rgb) + +def BuildHTMLTable(tree,M,build_dir): + global global_model_list + global_model_list = M + global global_table_color + def _genHTML(node): + global global_html + global global_table_color + ccolor = DarkenRowColor(global_table_color,fraction=0.95) + + # setup a html table row + if node.isLeaf(): + row = '' % ccolor + else: + row = '' % global_table_color + + # first table column + tab = '' + if node.isLeaf(): tab = '   ' + name = node.name + if node.confrontation: + conf = node.confrontation + if type(conf) == str: + path = conf.replace(build_dir,"").lstrip("/") + else: + path = os.path.join(conf.output_path.replace(build_dir,"").lstrip("/"),conf.name + ".html") + name = '%s' % (path,node.name) + if node.isLeaf(): + row += '%s%s (%.1f%%)' % (tab,name,100*node.normalize_weight) + else: + row += '%s%s' % (tab,name) + + # populate the rest of the columns + if type(node.score) != type(np.ma.empty(0)): node.score = np.ma.masked_array(np.zeros(len(global_model_list)),mask=True) + for i,m in enumerate(global_model_list): + if not node.score.mask[i]: + row += '%.2f' % node.score[i] + else: + row += '~' + + # end the table row + row += '
    ' + global_html += row + + for cat in tree.children: + global_table_color = cat.bgcolor + for var in cat.children: + TraversePreorder(var,_genHTML) + cat.name += " Summary" + _genHTML(cat) + cat.name.replace(" Summary","") + global_table_color = tree.bgcolor + tree.name = "Overall Summary" + _genHTML(tree) + +def GenerateTable(tree,M,S,composite=True): + global global_html + global global_model_list + if composite: CompositeScores(tree,M) + global_model_list = M + global_html = "" + BuildHTMLTable(tree,M,S.build_dir) + return global_html + +def GenerateSummaryFigure(tree,M,filename,rel_only=False): + + models = [m.name for m in M] + variables = [] + vcolors = [] + for cat in tree.children: + for var in cat.children: + variables.append(var.name) + vcolors.append(cat.bgcolor) + + data = np.ma.zeros((len(variables),len(models))) + row = -1 + for cat in tree.children: + for var in cat.children: + row += 1 + if type(var.score) == float: + data[row,:] = np.nan + else: + data[row,:] = var.score + + BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors,rel_only=rel_only) + +def GenerateRelSummaryFigure(S,M,figname,rel_only=False): + + # reorganize the relationship data + scores = {} + counts = {} + rows = [] + vcolors = [] + for h1 in S.children: + for dep in h1.children: + dname = dep.name.split("/")[0] + for ind in dep.children: + iname = ind.name.split("/")[0] + key = "%s/%s" % (dname,iname) + if scores.has_key(key): + scores[key] += ind.score + counts[key] += 1. + else: + scores[key] = np.copy(ind.score) + counts[key] = 1. + rows .append(key) + vcolors.append(h1.bgcolor) + if len(rows) == 0: return + data = np.ma.zeros((len(rows),len(M))) + for i,row in enumerate(rows): + data[i,:] = scores[row] / counts[row] + BenchmarkSummaryFigure([m.name for m in M],rows,data,figname,rel_only=rel_only,vcolor=vcolors) + +def GenerateRelationshipTree(S,M): + + # Create a tree which mimics the scoreboard for relationships, but + # we need + # + # root -> category -> datasets -> relationships + # + # instead of + # + # root -> category -> variable -> datasets + # + rel_tree = Node("root") + for cat in S.tree.children: + h1 = Node(cat.name) + h1.bgcolor = cat.bgcolor + h1.parent = rel_tree + rel_tree.children.append(h1) + for var in cat.children: + for data in var.children: + if data is None: continue + if data.relationships is None: continue + + # build tree + h2 = Node(data.confrontation.longname) + h1.children.append(h2) + h2.parent = h1 + h2.score = np.ma.masked_array(np.zeros(len(M)),mask=True) + for rel in data.relationships: + try: + longname = rel.longname + except: + longname = rel + v = Node(longname) + h2.children.append(v) + v.parent = h2 + v.score = np.ma.masked_array(np.zeros(len(M)),mask=True) + v.normalize_weight = 1./len(data.relationships) + path = data.confrontation.output_path + path = os.path.join(path,data.confrontation.name + ".html#Relationships") + v.confrontation = path + + # load scores + for i,m in enumerate(M): + fname = os.path.join(data.output_path,"%s_%s.nc" % (data.name,m.name)) + if not os.path.isfile(fname): continue + with Dataset(fname) as dset: + grp = dset.groups["Relationships"]["scalars"] + for rel,v in zip(data.relationships,h2.children): + try: + longname = rel.longname + except: + longname = rel + rs = [key for key in grp.variables.keys() if (longname.split("/")[0] in key and + "global" in key and + "RMSE" in key)] + if len(rs) != 1: continue + v.score[i] = grp.variables[rs[0]][...] + if "Overall Score global" not in grp.variables.keys(): continue + h2.score[i] = grp.variables["Overall Score global"][...] + + return rel_tree + + diff --git a/src/ILAMB/Variable.py b/src/ILAMB/Variable.py new file mode 100644 index 00000000..fcfa33f9 --- /dev/null +++ b/src/ILAMB/Variable.py @@ -0,0 +1,1746 @@ +from constants import spd,dpy,mid_months,bnd_months +from Regions import Regions +from mpl_toolkits.basemap import Basemap +import matplotlib.colors as colors +from pylab import get_cmap +from cf_units import Unit +import ilamblib as il +import Post as post +import numpy as np + +def _shiftLon(lon): + return (lon<=180)*lon + (lon>180)*(lon-360) + (lon<-180)*360 + +class Variable: + r"""A class for managing variables and their analysis. + + There are two ways to create a Variable object. Because python + does not support multiple constructors, we will use keyword + arguments so that the users intent may be captured. The first way + to specify a Variable is by loading a netCDF4 file. You can + achieve this by specifying the 'filename' and 'variable_name' + keywords. The second way is to use the remaining keyword arguments + to specify data arrays directly. If you use the second way, you + must specify the keywords 'data' and 'unit'. The rest are truly + optional and depend on the nature of your data. + + Parameters + ---------- + filename : str, optional + Name of the netCDF4 file from which to extract a variable + variable_name : str, optional + Name of the variable to extract from the netCDF4 file + data : numpy.ndarray, optional + The array which contains the data which constitutes the + variable + unit : str, optional + The unit of the input data + name : str, optional + The name of the variable, will be how it is saved in the netCDF4 + file + time : numpy.ndarray, optional + a 1D array of times in days since 1850-01-01 00:00:00 + time_bnds : numpy.ndarray, optional + a 2D array of time bounds in days since 1850-01-01 00:00:00 + lat : numpy.ndarray, optional + a 1D array of latitudes of cell centroids + lon : numpy.ndarray, optional + a 1D array of longitudes of cell centroids + area : numpy.ndarray, optional + a 2D array of the cell areas + ndata : int, optional + number of data sites this data represents + alternate_vars : list of str, optional + a list of alternate acceptable variable names + depth_bnds : numpy.ndarray, optional + a 2D array representing the boundaries of the cells in the vertical dimension + + Examples + -------- + + You can initiate a Variable by specifying the data directly. + + >>> lat = np.linspace(- 90, 90, 91) + >>> lon = np.linspace(-180,180,181) + >>> data = np.random.rand(91,181) + >>> v = Variable(name="some_variable",unit="some_unit",lat=lat,lon=lon,data=data) + + Or you can initiate a variable by extracting a specific field from a netCDF4 file. + + >>> v = Variable(filename="some_netcdf_file.nc",variable_name="name_of_var_to_extract") + + """ + def __init__(self,**keywords): + r"""Constructor for the variable class by specifying the data arrays. + """ + # See if the user specified a netCDF4 file and variable + filename = keywords.get("filename" ,None) + groupname = keywords.get("groupname" ,None) + variable_name = keywords.get("variable_name",None) + alternate_vars = keywords.get("alternate_vars",[]) + if filename is None: # if not pull data from other arguments + data = keywords.get("data" ,None) + unit = keywords.get("unit" ,None) + name = keywords.get("name" ,"unnamed") + time = keywords.get("time" ,None) + time_bnds = keywords.get("time_bnds" ,None) + lat = keywords.get("lat" ,None) + lat_bnds = keywords.get("lat_bnds" ,None) + lon = keywords.get("lon" ,None) + lon_bnds = keywords.get("lon_bnds" ,None) + depth = keywords.get("depth" ,None) + depth_bnds = keywords.get("depth_bnds" ,None) + ndata = keywords.get("ndata" ,None) + assert data is not None + assert unit is not None + cbounds = None + else: + assert variable_name is not None + t0 = keywords.get("t0",None) + tf = keywords.get("tf",None) + out = il.FromNetCDF4(filename,variable_name,alternate_vars,t0,tf,group=groupname) + data,unit,name,time,time_bnds,lat,lat_bnds,lon,lon_bnds,depth,depth_bnds,cbounds,ndata = out + + if not np.ma.isMaskedArray(data): data = np.ma.masked_array(data) + self.data = data + self.ndata = ndata + self.unit = unit + self.name = name + self.cbounds = cbounds + + def _createBnds(x): + x = np.asarray(x) + x_bnds = np.zeros((x.size,2)) + x_bnds[+1:,0] = 0.5*(x[:-1]+x[+1:]) + x_bnds[:-1,1] = 0.5*(x[:-1]+x[+1:]) + if x.size == 1: + x_bnds[ ...] = x + else: + x_bnds[ 0,0] = x[ 0] - 0.5*(x[ 1]-x[ 0]) + x_bnds[-1,1] = x[-1] + 0.5*(x[-1]-x[-2]) + return x_bnds + + # Handle time data + self.time = time # time data + self.time_bnds = time_bnds # bounds on time + self.temporal = False # flag for temporal data + self.dt = 0. # mean temporal spacing + self.monthly = False # flag for monthly means + if time is not None: + self.temporal = True + if self.time_bnds is None: self.time_bnds = _createBnds(self.time) + self.dt = (self.time_bnds[:,1]-self.time_bnds[:,0]).mean() + if np.allclose(self.dt,30,atol=3): self.monthly = True + assert (2*self.time.size) == (self.time_bnds.size) + + # Handle space or multimember data + self.spatial = False + self.lat = lat + self.lon = lon + self.lat_bnds = lat_bnds + self.lon_bnds = lon_bnds + self.area = keywords.get("area",None) + + # Shift possible values on [0,360] to [-180,180] + if self.lon is not None: self.lon = _shiftLon(self.lon ) + if self.lon_bnds is not None: self.lon_bnds = _shiftLon(self.lon_bnds) + + # If the last dimensions are lat and lon, this is spatial data + if lat is not None and lon is not None and data.ndim >= 2: + if (data.shape[-2] == lat.size and data.shape[-1] == lon.size): self.spatial = True + + if self.spatial is True: + if np.all(np.diff(self.lat)<0): # Flip if monotonically decreasing + self.lat = self.lat [::-1 ] + self.data = self.data[...,::-1,: ] + if self.lat_bnds is not None: self.lat_bnds = self.lat_bnds[::-1,::-1] + if self.area is not None: self.area = self.area [::-1,:] + if self.lat_bnds is None: self.lat_bnds = _createBnds(self.lat) + if self.lon_bnds is None: self.lon_bnds = _createBnds(self.lon) + if self.area is None: self.area = il.CellAreas(self.lat,self.lon) + # Some data arrays are arranged such that the first column + # of data is arranged at the prime meridian. This does not + # work well with some of the plotting and/or analysis + # operations we will need to perform. These require that + # the first column be coincident with the international + # dateline. Thus we roll the data the required amount. + shift = self.lon.argmin() + self.lon = np.roll(self.lon ,-shift) + self.lon_bnds = np.roll(self.lon_bnds,-shift,axis= 0) + self.data = np.roll(self.data ,-shift,axis=-1) + self.area = np.roll(self.area ,-shift,axis=-1) + # Fix potential problems with rolling the axes of the lon_bnds + if self.lon_bnds[ 0,0] > self.lon_bnds[ 0,1]: self.lon_bnds[ 0,0] = -180. + if self.lon_bnds[-1,0] > self.lon_bnds[-1,1]: self.lon_bnds[-1,1] = +180. + # Make sure that the value lies within the bounds + assert np.all((self.lat>=self.lat_bnds[:,0])*(self.lat<=self.lat_bnds[:,1])) + assert np.all((self.lon>=self.lon_bnds[:,0])*(self.lon<=self.lon_bnds[:,1])) + + # Is the data layered + self.layered = False + self.depth = depth + self.depth_bnds = depth_bnds + if (data.ndim > (self.temporal + 2*self.spatial + (self.ndata is not None))) and depth is not None: + self.layered = True + if depth_bnds is None: self.depth_bnds = _createBnds(self.depth) + + def __str__(self): + if self.data is None: return "Uninitialized Variable" + if self.ndata is None: + ndata = "N/A" + else: + ndata = str(self.ndata) + if not self.temporal: + time = "" + else: + time = " (%d)" % self.time.size + if not self.spatial: + space = "" + else: + space = " (%d,%d)" % (self.lat.size,self.lon.size) + if not self.layered: + layer = "" + else: + layer = " (%d)" % (self.depth.size) + s = "Variable: %s\n" % self.name + s += "-"*(len(self.name)+10) + "\n" + s += "{0:>20}: ".format("unit") + self.unit + "\n" + s += "{0:>20}: ".format("isTemporal") + str(self.temporal) + time + "\n" + s += "{0:>20}: ".format("isSpatial") + str(self.spatial) + space + "\n" + s += "{0:>20}: ".format("isLayered") + str(self.layered) + layer + "\n" + s += "{0:>20}: ".format("nDatasites") + ndata + "\n" + s += "{0:>20}: ".format("dataShape") + "%s\n" % (self.data.shape,) + np.seterr(over='ignore',under='ignore') + s += "{0:>20}: ".format("dataMax") + "%e\n" % self.data.max() + s += "{0:>20}: ".format("dataMin") + "%e\n" % self.data.min() + s += "{0:>20}: ".format("dataMean") + "%e\n" % self.data.mean() + np.seterr(over='warn',under='warn') + if self.cbounds is not None: + s += "{0:>20}: ".format("climatology") + "%d thru %d\n" % (self.cbounds[0],self.cbounds[1]) + + return s + + def nbytes(self): + r"""Estimate the memory usage of a variable in bytes. + """ + nbytes = 0. + for key in self.__dict__.keys(): + try: + nbytes += self.__dict__[key].nbytes + except: + pass + return nbytes + + def integrateInTime(self,**keywords): + r"""Integrates the variable over a given time period. + + Uses nodal integration to integrate to approximate + + .. math:: \int_{t_0}^{t_f} v(t,\dots)\ dt + + The arguments of the integrand reflect that while it must be + at least defined in time, the remaining arguments are + flexible. If :math:`t_0` or :math:`t_f` are not specified, the + variable will be integrated over the extent of its time + domain. If the mean function value over time is desired, this + routine will approximate + + .. math:: \frac{1}{t_f-t_0} \int_{t_0}^{t_f} v(t,\dots)\ dt + + again by nodal integration. The amount of time which we divide + by is the non-masked amount of time. This means that if a + function has some values masked or marked as invalid, we do + not penalize the average value by including this as a time at + which data is expected. + + Parameters + ---------- + t0 : float, optional + initial time in days since 1/1/1850 + tf : float, optional + final time in days since 1/1/1850 + mean : boolean, optional + enable to divide the integrand to get the mean function value + + Returns + ------- + integral : ILAMB.Variable.Variable + a Variable instance with the integrated value along with the + appropriate name and unit change + + """ + if not self.temporal: raise il.NotTemporalVariable() + t0 = keywords.get("t0",self.time_bnds[:,0].min()) + tf = keywords.get("tf",self.time_bnds[:,1].max()) + mean = keywords.get("mean",False) + + # find which time bounds are included even partially in the interval [t0,tf] + time_bnds = np.copy(self.time_bnds) + ind = np.where((t0time_bnds[:,0]))[0] + time_bnds[(t0>time_bnds[:,0])*(t0time_bnds[:,0])*(tf 1 and self.data.mask.size > 1: + mask = np.apply_along_axis(np.all,0,self.data.mask[ind]) + integral = np.ma.masked_array(integral,mask=mask,copy=False) + + # handle units + unit = Unit(self.unit) + name = self.name + "_integrated_over_time" + + if mean: + + # divide thru by the non-masked amount of time, the units + # can remain as input because we integrate over time and + # then divide by the time interval in the same units + name += "_and_divided_by_time_period" + if self.data.mask.size > 1: + dt = (dt*(self.data.mask[ind]==0)).sum(axis=0) + else: + dt = dt.sum(axis=0) + np.seterr(over='ignore',under='ignore') + integral = integral / dt + np.seterr(over='raise' ,under='raise' ) + + else: + + # if not a mean, we need to potentially handle unit conversions + unit0 = Unit("d")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) + + return Variable(data = integral, + unit = "%s" % unit, + name = name, + lat = self.lat, + lat_bnds = self.lat_bnds, + lon = self.lon, + lon_bnds = self.lon_bnds, + depth = self.depth, + depth_bnds = self.depth_bnds, + area = self.area, + ndata = self.ndata) + + def integrateInDepth(self,**keywords): + r"""Integrates the variable over a given layer limits. + + Uses nodal integration to integrate to approximate + + .. math:: \int_{z_0}^{z_f} v(z,\dots)\ dz + + The arguments of the integrand reflect that while it must be + at least defined in depth, the remaining arguments are + flexible. If :math:`z_0` or :math:`z_f` are not specified, the + variable will be integrated over the extent of its depth + domain. If the mean function value over depth is desired, this + routine will approximate + + .. math:: \frac{1}{z_f-z_0} \int_{z_0}^{z_f} v(z,\dots)\ dz + + again by nodal integration. The amount of depth which we + divide by is the non-masked amount of depth. This means that + if a function has some values masked or marked as invalid, we + do not penalize the average value by including this as a depth + at which data is expected. + + Parameters + ---------- + z0 : float, optional + initial depth in m + zf : float, optional + final depth in m + mean : boolean, optional + enable to divide the integrand to get the mean function value + + Returns + ------- + integral : ILAMB.Variable.Variable + a Variable instance with the integrated value along with the + appropriate name and unit change + + """ + if not self.layered: raise il.NotLayeredVariable() + z0 = keywords.get("z0",self.depth_bnds[:,0].min()) + zf = keywords.get("zf",self.depth_bnds[:,1].max()) + mean = keywords.get("mean",False) + + # find which time bounds are included even partially in the interval [z0,zf] + depth_bnds = np.copy(self.depth_bnds) + ind = np.where((z0depth_bnds[:,0]))[0] + depth_bnds[(z0>depth_bnds[:,0])*(z0depth_bnds[:,0])*(zf 1 and self.data.mask.size > 1: + mask = np.apply_along_axis(np.all,axis,self.data.mask[ind]) + integral = np.ma.masked_array(integral,mask=mask,copy=False) + + # handle units + unit = Unit(self.unit) + name = self.name + "_integrated_over_depth" + + if mean: + + # divide thru by the non-masked amount of time, the units + # can remain as input because we integrate over time and + # then divide by the time interval in the same units + name += "_and_divided_by_depth" + if self.data.mask.size > 1: + dz = (dz*(self.data.mask[ind]==0)).sum(axis=axis) + else: + dz = dz.sum(axis=axis) + np.seterr(over='ignore',under='ignore') + integral = integral / dz + np.seterr(over='raise' ,under='raise' ) + + else: + + # if not a mean, we need to potentially handle unit conversions + unit0 = Unit("m")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) + + return Variable(data = integral, + unit = "%s" % unit, + name = name, + time = self.time, + time_bnds = self.time_bnds, + lat = self.lat, + lat_bnds = self.lat_bnds, + lon = self.lon, + lon_bnds = self.lon_bnds, + area = self.area, + ndata = self.ndata) + + def integrateInSpace(self,region=None,mean=False,weight=None,intabs=False): + r"""Integrates the variable over a given region. + + Uses nodal integration to integrate to approximate + + .. math:: \int_{\Omega} v(\mathbf{x},\dots)\ d\Omega + + The arguments of the integrand reflect that while it must be + at least defined in space, the remaining arguments are + flexible. The variable :math:`\Omega` represents the desired + region over which we will integrate. If no region is + specified, the variable will be integrated over the extent of + its spatial domain. If the mean function value over time is + desired, this routine will approximate + + .. math:: \frac{1}{A(\Omega)} \int_{\Omega} v(\mathbf{x},\dots)\ d\Omega + + again by nodal integration. The spatial area which we divide + by :math:`A(\Omega)` is the non-masked area of the given + region, also given by + + .. math:: A(\Omega) = \int_{\Omega}\ d\Omega + + This means that if a function has some values masked or marked + as invalid, we do not penalize the average value by including + this as a point at which data is expected. + + We also support the inclusion of an optional weighting + function :math:`w(\mathbf{x})` which is a function of space + only. In this case, we approximate the following integral + + .. math:: \int_{\Omega} v(\mathbf{x},\dots)w(\mathbf{x})\ d\Omega + + and if a mean value is desired, + + .. math:: \frac{1}{\int_{\Omega} w(\mathbf{x})\ d\Omega} \int_{\Omega} v(\mathbf{x},\dots)w(\mathbf{x})\ d\Omega + + Parameters + ---------- + region : str, optional + name of the region overwhich you wish to integrate + mean : bool, optional + enable to divide the integrand to get the mean function value + weight : numpy.ndarray, optional + a data array of the same shape as this variable's areas + representing an additional weight in the integrand + intabs : bool, optional + enable to integrate the absolute value + + Returns + ------- + integral : ILAMB.Variable.Variable + a Variable instace with the integrated value along with the + appropriate name and unit change. + + """ + def _integrate(var,areas): + op = lambda x : x + if intabs: op = np.abs + assert var.shape[-2:] == areas.shape + np.seterr(over='ignore',under='ignore') + vbar = (op(var)*areas).sum(axis=-1).sum(axis=-1) + np.seterr(over='raise',under='raise') + return vbar + + if not self.spatial: raise il.NotSpatialVariable() + + # determine the measure + mask = self.data.mask + while mask.ndim > 2: mask = np.all(mask,axis=0) + measure = np.ma.masked_array(self.area,mask=mask,copy=True) + if weight is not None: measure *= weight + + # if we want to integrate over a region, we need add to the + # measure's mask + r = Regions() + if region is not None: measure.mask += r.getMask(region,self) + + # approximate the integral + integral = _integrate(self.data,measure) + if mean: + np.seterr(under='ignore') + integral = integral / measure.sum() + np.seterr(under='raise') + + # handle the name and unit + name = self.name + "_integrated_over_space" + if region is not None: name = name.replace("space",region) + unit = Unit(self.unit) + if mean: + + # we have already divided thru by the non-masked area in + # units of m^2, which are the same units of the integrand. + name += "_and_divided_by_area" + else: + + # if not a mean, we need to potentially handle unit conversions + unit0 = Unit("m2")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) + + return Variable(data = np.ma.masked_array(integral), + unit = "%s" % unit, + time = self.time, + time_bnds = self.time_bnds, + depth = self.depth, + depth_bnds = self.depth_bnds, + name = name) + + def siteStats(self,region=None,weight=None,intabs=False): + """Computes the mean and standard deviation of the variable over all data sites. + + Parameters + ---------- + region : str, optional + name of the region overwhich you wish to include stats. + + Returns + ------- + mean : ILAMB.Variable.Variable + a Variable instace with the mean values + + """ + if self.ndata is None: raise il.NotDatasiteVariable() + op = lambda x : x + if intabs: op = np.abs + rem_mask = np.copy(self.data.mask) + rname = "" + r = Regions() + if region is not None: + self.data.mask += r.getMask(region,self) + rname = "_over_%s" % region + np.seterr(over='ignore',under='ignore') + mean = np.ma.average(op(self.data),axis=-1,weights=weight) + np.seterr(over='raise',under='raise') + self.data.mask = rem_mask + return Variable(data = mean, + unit = self.unit, + time = self.time, + time_bnds = self.time_bnds, + depth = self.depth, + depth_bnds = self.depth_bnds, + name = "mean_%s%s" % (self.name,rname)) + + def annualCycle(self): + """Computes mean annual cycle information (climatology) for the variable. + + For each site/cell/depth in the variable, compute the mean annual cycle. + + Returns + ------- + mean : ILAMB.Variable.Variable + The annual cycle mean values + """ + if not self.temporal: raise il.NotTemporalVariable() + assert self.monthly + assert self.time.size > 11 + begin = np.argmin(self.time[:11]%365) + end = begin+int(self.time[begin:].size/12.)*12 + shp = (-1,12) + self.data.shape[1:] + v = self.data[begin:end,...].reshape(shp) + np.seterr(over='ignore',under='ignore') + mean = v.mean(axis=0) + np.seterr(over='raise',under='raise') + return Variable(data = mean, + unit = self.unit, + name = "annual_cycle_mean_of_%s" % self.name, + time = mid_months, + time_bnds = np.asarray([bnd_months[:-1],bnd_months[1:]]).T, + lat = self.lat, + lat_bnds = self.lat_bnds, + lon = self.lon, + lon_bnds = self.lon_bnds, + area = self.area, + depth = self.depth, + depth_bnds = self.depth_bnds, + ndata = self.ndata) + + def timeOfExtrema(self,etype="max"): + """Returns the time of the specified extrema. + + Parameters + ---------- + etype : str, optional + The type of extrema to compute, either 'max' or 'min' + + Returns + ------- + extrema : ILAMB.Variable.Variable + The times of the extrema computed + """ + if not self.temporal: raise il.NotTemporalVariable() + fcn = {"max":np.argmax,"min":np.argmin} + assert etype in fcn.keys() + tid = np.apply_along_axis(fcn[etype],0,self.data) + mask = False + if self.data.ndim > 1 and self.data.mask.ndim > 0: mask = np.apply_along_axis(np.all,0,self.data.mask) # mask cells where all data is masked + data = np.ma.masked_array(self.time[tid],mask=mask) + return Variable(data = data, + unit = "d", + name = "time_of_%s_%s" % (etype,self.name), + lat = self.lat, + lat_bnds = self.lat_bnds, + lon = self.lon, + lon_bnds = self.lon_bnds, + area = self.area, + depth = self.depth, + depth_bnds = self.depth_bnds, + ndata = self.ndata) + + def extractDatasites(self,lat,lon): + """Extracts a variable at sites defined by a set of latitude and longitude. + + Parameters + ---------- + lat : numpy.ndarray + an array with the latitude values, must be same size as the longitude values + lon : numpy.ndarray + an array with the longitude values, must be same size as the latitude values + + Returns + ------- + extracted : ILAMB.Variable.Variable + The extracted variables + """ + assert lat.size == lon.size + if not self.spatial: raise il.NotSpatialVariable() + ilat = np.apply_along_axis(np.argmin,1,np.abs(lat[:,np.newaxis]-self.lat)) + ilon = np.apply_along_axis(np.argmin,1,np.abs(lon[:,np.newaxis]-self.lon)) + ndata = lat.size + if self.data.ndim == 2: + data = self.data[ ilat,ilon] + else: + data = self.data[...,ilat,ilon] + return Variable(data = data, + unit = self.unit, + name = self.name, + lat = lat, + lon = lon, + ndata = ndata, + depth = self.depth, + depth_bnds = self.depth_bnds, + time = self.time, + time_bnds = self.time_bnds) + + def spatialDifference(self,var): + """Computes the point-wise difference of two spatially defined variables. + + If the variable is spatial or site data and is defined on the + same grid, this routine will simply compute the difference in + the data arrays. If the variables are spatial but defined on + separate grids, the routine will interpolate both variables to + a composed grid via nearest-neighbor interpolation and then + return the difference. + + Parameters + ---------- + var : ILAMB.Variable.Variable + The variable we wish to compare against this variable + + Returns + ------- + diff : ILAMB.Variable.Variable + A new variable object representing the difference + """ + def _make_bnds(x): + bnds = np.zeros(x.size+1) + bnds[1:-1] = 0.5*(x[1:]+x[:-1]) + bnds[0] = max(x[0] -0.5*(x[ 1]-x[ 0]),-180) + bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) + return bnds + assert Unit(var.unit) == Unit(self.unit) + assert self.temporal == False + assert self.ndata == var.ndata + assert self.layered == False + # Perform a check on the spatial grid. If it is the exact same + # grid, there is no need to interpolate. + same_grid = False + try: + same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon) + except: + pass + + if same_grid: + error = np.ma.masked_array(var.data-self.data,mask=self.data.mask+var.data.mask) + diff = Variable(data = error, + unit = var.unit, + lat = var.lat, + lat_bnds = var.lat_bnds, + lon = var.lon, + lon_bnds = var.lon_bnds, + ndata = var.ndata, + name = "%s_minus_%s" % (var.name,self.name)) + else: + if not self.spatial: raise il.NotSpatialVariable() + lat_bnd1 = _make_bnds(self.lat) + lon_bnd1 = _make_bnds(self.lon) + lat_bnd2 = _make_bnds( var.lat) + lon_bnd2 = _make_bnds( var.lon) + lat_bnd,lon_bnd,lat,lon,error = il.TrueError(lat_bnd1,lon_bnd1,self.lat,self.lon,self.data, + lat_bnd2,lon_bnd2, var.lat, var.lon, var.data) + diff = Variable(data = error, + unit = var.unit, + lat = lat, + lat_bnd = lat_bnd, + lon = lon, + lon_bnd = lon_bnd, + name = "%s_minus_%s" % (var.name,self.name)) + return diff + + def convert(self,unit,density=998.2): + """Convert the variable to a given unit. + + We use the UDUNITS library via the cf_units python interface to + convert the variable's unit. Additional support is provided + for unit conversions in which substance information is + required. For example, in quantities such as precipitation it + is common to have data in the form of a mass rate per unit + area [kg s-1 m-2] yet desire it in a linear rate [m s-1]. This + can be accomplished if the density of the substance is + known. We assume here that water is the substance, but this + can be changed by specifying the density when calling the + function. + + Parameters + ---------- + unit : str + the desired converted unit + density : float, optional + the mass density in [kg m-3] to use when converting linear + rates to area density rates + + Returns + ------- + self : ILAMB.Variable.Variable + this object with its unit converted + + """ + if unit is None: return self + src_unit = Unit(self.unit) + tar_unit = Unit( unit) + mask = self.data.mask + + # Define some generic quantities + linear = Unit("m") + linear_rate = Unit("m s-1") + area_density = Unit("kg m-2") + area_density_rate = Unit("kg m-2 s-1") + mass_density = Unit("kg m-3") + volume_conc = Unit("mol m-3") + mass_conc = Unit("mol kg-1") + + # UDUNITS doesn't handle frequently found temperature expressions + synonyms = {"K":"degK", + "R":"degR", + "C":"degC", + "F":"degF"} + for syn in synonyms.keys(): + if src_unit.format() == syn: src_unit = Unit(synonyms[syn]) + if tar_unit.format() == syn: tar_unit = Unit(synonyms[syn]) + + # Do we need to multiply by density? + if ( (src_unit.is_convertible(linear_rate) and tar_unit.is_convertible(area_density_rate)) or + (src_unit.is_convertible(linear ) and tar_unit.is_convertible(area_density )) or + (src_unit.is_convertible(mass_conc ) and tar_unit.is_convertible(volume_conc )) ): + np.seterr(over='ignore',under='ignore') + self.data *= density + np.seterr(over='raise',under='raise') + src_unit *= mass_density + + # Do we need to divide by density? + if ( (tar_unit.is_convertible(linear_rate) and src_unit.is_convertible(area_density_rate)) or + (tar_unit.is_convertible(linear ) and src_unit.is_convertible(area_density )) or + (tar_unit.is_convertible(mass_conc ) and src_unit.is_convertible(volume_conc )) ): + np.seterr(over='ignore',under='ignore') + self.data = self.data / density + np.seterr(over='raise',under='raise') + src_unit = src_unit / mass_density + + # Convert units + try: + self.data = src_unit.convert(self.data,tar_unit) + self.data = np.ma.masked_array(self.data,mask=mask) + self.unit = unit + except: + raise il.UnitConversionError() + return self + + def toNetCDF4(self,dataset,attributes=None,group=None): + """Adds the variable to the specified netCDF4 dataset. + + Parameters + ---------- + dataset : netCDF4.Dataset + a dataset into which you wish to save this variable + attributes : dict of scalars, optional + a dictionary of additional scalars to encode as ncattrs + group : str, optional + the name of the netCDF4 group to to which we add this variable + """ + def _checkTime(t,dset): + """A local function for ensuring the time dimension is saved in the dataset.""" + time_name = "time" + while True: + if time_name in dset.dimensions.keys(): + if (t.shape == dset.variables[time_name][...].shape and + np.allclose(t,dset.variables[time_name][...],atol=0.5*self.dt)): + return time_name + else: + time_name += "_" + else: + dset.createDimension(time_name) + T = dset.createVariable(time_name,"double",(time_name)) + T.setncattr("units","days since 1850-01-01 00:00:00") + T.setncattr("calendar","noleap") + T.setncattr("axis","T") + T.setncattr("long_name","time") + T.setncattr("standard_name","time") + T[...] = t + if self.time_bnds is not None: + bnd_name = time_name.replace("time","time_bnds") + T.setncattr("bounds",bnd_name) + if "nb" not in dset.dimensions.keys(): + D = dset.createDimension("nb",size=2) + if bnd_name not in dset.variables.keys(): + B = dset.createVariable(bnd_name,"double",(time_name,"nb")) + B.setncattr("units","days since 1850-01-01 00:00:00") + B[...] = self.time_bnds + return time_name + + def _checkLat(lat,dset): + """A local function for ensuring the lat dimension is saved in the dataset.""" + lat_name = "lat" + while True: + if lat_name in dset.dimensions.keys(): + if (lat.shape == dset.variables[lat_name][...].shape and + np.allclose(lat,dset.variables[lat_name][...])): + return lat_name + else: + lat_name += "_" + else: + dset.createDimension(lat_name,size=lat.size) + Y = dset.createVariable(lat_name,"double",(lat_name)) + Y.setncattr("units","degrees_north") + Y.setncattr("axis","Y") + Y.setncattr("long_name","latitude") + Y.setncattr("standard_name","latitude") + Y[...] = lat + if self.lat_bnds is not None: + bnd_name = lat_name.replace("lat","lat_bnds") + Y.setncattr("bounds",bnd_name) + if "nb" not in dset.dimensions.keys(): + D = dset.createDimension("nb",size=2) + if bnd_name not in dset.variables.keys(): + B = dset.createVariable(bnd_name,"double",(lat_name,"nb")) + B.setncattr("units","degrees_north") + B[...] = self.lat_bnds + return lat_name + + def _checkLon(lon,dset): + """A local function for ensuring the lon dimension is saved in the dataset.""" + lon_name = "lon" + while True: + if lon_name in dset.dimensions.keys(): + if (lon.shape == dset.variables[lon_name][...].shape and + np.allclose(lon,dset.variables[lon_name][...])): + return lon_name + else: + lon_name += "_" + else: + dset.createDimension(lon_name,size=lon.size) + X = dset.createVariable(lon_name,"double",(lon_name)) + X.setncattr("units","degrees_east") + X.setncattr("axis","X") + X.setncattr("long_name","longitude") + X.setncattr("standard_name","longitude") + X[...] = lon + if self.lon_bnds is not None: + bnd_name = lon_name.replace("lon","lon_bnds") + X.setncattr("bounds",bnd_name) + if "nb" not in dset.dimensions.keys(): + D = dset.createDimension("nb",size=2) + if bnd_name not in dset.variables.keys(): + B = dset.createVariable(bnd_name,"double",(lon_name,"nb")) + B.setncattr("units","degrees_east") + B[...] = self.lon_bnds + return lon_name + + def _checkData(ndata,dset): + """A local function for ensuring the data dimension is saved in the dataset.""" + data_name = "data" + while True: + if data_name in dset.dimensions.keys(): + if (ndata == len(dset.dimensions[data_name])): + return data_name + else: + data_name += "_" + else: + dset.createDimension(data_name,size=ndata) + return data_name + + def _checkLayer(layer,dataset): + """A local function for ensuring the layer dimension is saved in the dataset.""" + layer_name = "layer" + while True: + if layer_name in dataset.dimensions.keys(): + if (layer.shape == dataset.variables[layer_name][...].shape and + np.allclose(layer,dataset.variables[layer_name][...])): + return layer_name + else: + layer_name += "_" + else: + dataset.createDimension(layer_name,size=layer.size) + Z = dataset.createVariable(layer_name,"double",(layer_name)) + Z.setncattr("units","m") + Z.setncattr("axis","Z") + Z.setncattr("long_name","depth") + Z.setncattr("standard_name","depth") + Z[...] = layer + if self.depth_bnds is not None: + bnd_name = layer_name.replace("layer","layer_bnds") + Z.setncattr("bounds",bnd_name) + if "nb" not in dataset.dimensions.keys(): + D = dataset.createDimension("nb",size=2) + if bnd_name not in dataset.variables.keys(): + B = dataset.createVariable(bnd_name,"double",(layer_name,"nb")) + B.setncattr("units","m") + B[...] = self.depth_bnds + return layer_name + + # if not group is desired, just write to the dataset... + if group is None: + dset = dataset + else: + # if a group is desired, check to see it exists and write into group + if not dataset.groups.has_key(group): + dset = dataset.createGroup(group) + else: + dset = dataset.groups[group] + + dim = [] + if self.temporal: dim.append(_checkTime (self.time ,dset)) + if self.layered: dim.append(_checkLayer(self.depth,dset)) + if self.ndata is not None: + dim.append(_checkData (self.ndata,dset)) + _checkLat(self.lat,dset) + _checkLon(self.lon,dset) + else: + if self.lat is not None: dim.append(_checkLat (self.lat ,dset)) + if self.lon is not None: dim.append(_checkLon (self.lon ,dset)) + + grp = dset + if self.data.size == 1: + if not dset.groups.has_key("scalars"): + grp = dset.createGroup("scalars") + else: + grp = dset.groups["scalars"] + + V = grp.createVariable(self.name,"double",dim,zlib=True) + V.setncattr("units",self.unit) + try: + V.setncattr("max",self.data.max()) + V.setncattr("min",self.data.min()) + except: + V.setncattr("max",0) + V.setncattr("min",1) + + if self.data.size == 1: + # we are dealing with a scalar + if np.ma.is_masked(self.data): self.data = 0 + else: + # not a scalar, find the middle 98 percent of the data + data = np.ma.copy(self.data).compressed().reshape((-1)) + if data.size == 0: + V.setncattr("up99",1) + V.setncattr("dn99",0) + else: + data.sort() + V.setncattr("up99",data[min(int(round(0.99*data.size)),data.size-1)]) + V.setncattr("dn99",data[ int(round(0.01*data.size))]) + + # optionally write out more attributes + if attributes: + for key in attributes.keys(): + V.setncattr(key,attributes[key]) + + if type(self.data) is np.ma.core.MaskedConstant: + V[...] = np.nan + else: + V[...] = self.data + + def plot(self,ax,**keywords): + """Plots the variable on the given matplotlib axis. + + The behavior of this routine depends on the type of variable + specified. If the data is purely temporal, then the plot will + be a scatter plot versus time of the data. If it is purely + spatial, then the plot will be a global plot of the data. The + routine supports multiple keywords although some may not apply + to the type of plot being generated. + + Parameters + ---------- + ax : matplotlib.axes._subplots.AxesSubplot + The matplotlib axes object onto which you wish to plot the variable + lw : float, optional + The line width to use when plotting + alpha : float, optional + The degree of transparency when plotting, alpha \in [0,1] + color : str or RGB tuple, optional + The color to plot with in line plots + label : str, optional + The label to appear in the legend of line plots + vmin : float, optional + The minimum plotted value + vmax : float, optional + The maximum plotted value + region : str, optional + The region on which to display a spatial variable + cmap : str, optional + The name of the colormap to be used in plotting the spatial variable + ticks : array of floats, optional + Defines the locations of xtick + ticklabels : array of strings, optional + Defines the labels of the xticks + """ + lw = keywords.get("lw" ,1.0) + alpha = keywords.get("alpha" ,1.0) + color = keywords.get("color" ,"k") + label = keywords.get("label" ,None) + vmin = keywords.get("vmin" ,self.data.min()) + vmax = keywords.get("vmax" ,self.data.max()) + region = keywords.get("region","global") + cmap = keywords.get("cmap" ,"jet") + land = keywords.get("land" ,0.875) + water = keywords.get("water" ,0.750) + pad = keywords.get("pad" ,5.0) + cbar = keywords.get("cbar" ,False) + + rem_mask = None + r = Regions() + if self.temporal and not self.spatial: + + ticks = keywords.get("ticks",None) + ticklabels = keywords.get("ticklabels",None) + t = self.time/365.+1850 + ax.plot(t,self.data,'-', + color = color, + lw = lw, + alpha = alpha, + label = label) + if ticks is not None: ax.set_xticks(ticks) + if ticklabels is not None: ax.set_xticklabels(ticklabels) + ax.grid('on') + ax.set_ylim(vmin,vmax) + + elif not self.temporal: + + # Mask out areas outside our region + rem_mask = np.copy(self.data.mask) + self.data.mask += r.getMask(region,self) + + # Find the figure geometry + if self.ndata: + LAT = np.ma.masked_array(self.lat,mask=self.data.mask,copy=True) + LON = np.ma.masked_array(self.lon,mask=self.data.mask,copy=True) + dateline = False + else: + LAT,LON = np.meshgrid(self.lat,self.lon,indexing='ij') + LAT = np.ma.masked_array(LAT,mask=self.data.mask,copy=False) + LON = np.ma.masked_array(LON,mask=self.data.mask,copy=False) + + lat0 = LAT.min() ; latf = LAT.max() + lon0 = LON.min() ; lonf = LON.max() + latm = LAT.mean(); lonm = LON.mean() + area = (latf-lat0)*(lonf-lon0) + + # Setup the plot projection depending on data limits + bmap = Basemap(projection = 'robin', + lon_0 = 0, + ax = ax, + resolution = 'c') + if (lon0 < -170.) and (lonf > 170.): + if lat0 > 23.5: + bmap = Basemap(projection = 'npstere', + boundinglat = lat0-5., + lon_0 = 0., + ax = ax, + resolution = 'c') + elif latf < -23.5: + bmap = Basemap(projection = 'spstere', + boundinglat = latf+5., + lon_0 = 180., + ax = ax, + resolution = 'c') + else: + if area < 10000.: + bmap = Basemap(projection = 'cyl', + llcrnrlon = lon0-2*pad, + llcrnrlat = lat0- pad, + urcrnrlon = lonf+2*pad, + urcrnrlat = latf+ pad, + ax = ax, + resolution = 'c') + try: + bmap.drawlsmask(land_color = str(land), + ocean_color = str(water), + lakes = True) + except: + bmap.drawcoastlines(linewidth = 0.2, + color = "darkslategrey") + + if self.spatial: + LAT,LON = np.meshgrid(self.lat,self.lon,indexing='ij') + ax = bmap.pcolormesh(LON,LAT,self.data, + latlon=True,vmin=vmin,vmax=vmax,cmap=cmap) + elif self.ndata is not None: + x,y = bmap(self.lon[self.data.mask==False], + self.lat[self.data.mask==False]) + data = self.data[self.data.mask==False] + norm = colors.Normalize(vmin,vmax) + norm = norm(data) + clmp = get_cmap(cmap) + clrs = clmp(norm) + size = 35 + ax = bmap.scatter(x,y,s=size,color=clrs,ax=ax,linewidths=0,cmap=cmap) + if cbar: + cb = bmap.colorbar(ax,location='bottom',pad="5%") + if label is not None: cb.set_label(label) + if rem_mask is not None: self.data.mask = rem_mask + return ax + + + def interpolate(self,time=None,lat=None,lon=None,lat_bnds=None,lon_bnds=None,itype='nearestneighbor'): + """Use nearest-neighbor interpolation to interpolate time and/or space at given values. + + Parameters + ---------- + time : numpy.ndarray, optional + Array of times at which to interpolate the variable + lat : numpy.ndarray, optional + Array of latitudes at which to interpolate the variable + lon : numpy.ndarray, optional + Array of longitudes at which to interpolate the variable + + Returns + ------- + var : ILAMB.Variable.Variable + The interpolated variable + """ + if time is None and lat is None and lon is None: return self + output_time = self.time if (time is None) else time + output_tbnd = self.time_bnds if (time is None) else None + output_lat = self.lat if (lat is None) else lat + output_lon = self.lon if (lon is None) else lon + output_area = self.area if (lat is None and lon is None) else None + + data = self.data + if self.spatial and (lat is not None or lon is not None): + if lat is None: lat = self.lat + if lon is None: lon = self.lon + if itype == 'nearestneighbor': + rows = (np.abs(lat[:,np.newaxis]-self.lat)).argmin(axis=1) + cols = (np.abs(lon[:,np.newaxis]-self.lon)).argmin(axis=1) + args = [] + if self.temporal: args.append(range(self.time.size)) + if self.layered: args.append(range(self.depth.size)) + args.append(rows) + args.append(cols) + ind = np.ix_(*args) + mask = data.mask[ind] + data = data.data[ind] + data = np.ma.masked_array(data,mask=mask) + frac = self.area / il.CellAreas(self.lat,self.lon,self.lat_bnds,self.lon_bnds).clip(1e-12) + frac = frac.clip(0,1) + frac = frac[np.ix_(rows,cols)] + output_area = frac * il.CellAreas(lat,lon,lat_bnds,lon_bnds) + elif itype == 'bilinear': + from scipy.interpolate import RectBivariateSpline + if self.data.ndim == 3: + halo = il.LandLinInterMissingValues(self.data) + data = np.ma.zeros((self.data.shape[:-2]+(lat.size,lon.size))) + for i in range(self.data.shape[0]): + dint = RectBivariateSpline(self.lat,self.lon, halo[i,...], kx=1,ky=1) + mint = RectBivariateSpline(self.lat,self.lon,self.data[i,...].mask,kx=1,ky=1) + data[i,...] = np.ma.masked_array(dint(lat,lon,grid=True), + mint(lat,lon,grid=True)>0.5) + frac = self.area / il.CellAreas(self.lat,self.lon).clip(1e-12) + frac = frac.clip(0,1) + frac = RectBivariateSpline(self.lat,self.lon,frac,kx=1,ky=1) + output_area = frac(lat,lon,grid=True) * il.CellAreas(lat,lon) + else: + raise ValueError("Uknown interpolation type: %s" % itype) + if self.temporal and time is not None: + times = np.apply_along_axis(np.argmin,1,np.abs(time[:,np.newaxis]-self.time)) + mask = data.mask + if mask.size > 1: mask = data.mask[times,...] + data = data.data[times,...] + data = np.ma.masked_array(data,mask=mask) + output_tbnd = self.time_bnds[times] + return Variable(data = data, unit = self.unit, name = self.name, ndata = self.ndata, + lat = output_lat, + lon = output_lon, + area = output_area, + time = output_time, + time_bnds = output_tbnd) + + def phaseShift(self,var,method="max_of_annual_cycle"): + """Computes the phase shift between a variable and this variable. + + Finds the phase shift as the time between extrema of the + annual cycles of the variables. Note that if this var and/or + the given variable are not already annual cycles, they will be + computed but not returned. + + Parameters + ---------- + var : ILAMB.Variable.Variable + The variable with which we will measure phase shift + method : str, optional + The name of the method used to compute the phase shift + + """ + assert method in ["max_of_annual_cycle","min_of_annual_cycle"] + assert self.temporal == var.temporal + v1 = self; v2 = var + if not self.temporal: + # If the data is not temporal, then the user may have + # already found the extrema. If the units of the input + # variable are days, then set the extrema to this data. + if not (self.unit == "d" and var.unit == "d"): raise il.NotTemporalVariable + e1 = v1 + e2 = v2 + else: + # While temporal, the user may have passed in the mean + # annual cycle as the variable. So if the leading + # dimension is 12 we assume the variables are already the + # annual cycles. If not, we compute the cycles and then + # compute the extrema. + if self.time.size != 12: v1 = self.annualCycle() + if var.time.size != 12: v2 = var .annualCycle() + e1 = v1.timeOfExtrema(etype=method[:3]) + e2 = v2.timeOfExtrema(etype=method[:3]) + if e1.spatial: + shift = e1.spatialDifference(e2) + else: + data = e2.data - e1.data + mask = e1.data.mask + e2.data.mask + shift = Variable(data=data,unit=e1.unit,ndata=e1.ndata,lat=e1.lat,lon=e1.lon) + shift.name = "phase_shift_of_%s" % e1.name + shift.data += (shift.data < -0.5*365.)*365. + shift.data -= (shift.data > +0.5*365.)*365. + return shift + + def correlation(self,var,ctype,region=None): + """Computes the correlation between two variables. + + Parameters + ---------- + var : ILAMB.Variable.Variable + The variable with which we will compute a correlation + ctype : str + The correlation type, one of {"spatial","temporal","spatiotemporal"} + region : str, optional + The region over which to perform a spatial correlation + + Notes + ----- + Need to better think about what correlation means when data + are masked. The sums ignore the data but then the number of + items *n* is not constant and should be reduced for masked + values. + + """ + def _correlation(x,y,axes=None): + if axes is None: axes = range(x.ndim) + if type(axes) == int: axes = (int(axes),) + axes = tuple(axes) + n = 1 + for ax in axes: n *= x.shape[ax] + xbar = x.sum(axis=axes)/n # because np.mean() doesn't take axes which are tuples + ybar = y.sum(axis=axes)/n + xy = (x*y).sum(axis=axes) + x2 = (x*x).sum(axis=axes) + y2 = (y*y).sum(axis=axes) + try: + r = (xy-n*xbar*ybar)/(np.sqrt(x2-n*xbar*xbar)*np.sqrt(y2-n*ybar*ybar)) + except: + r = np.nan + return r + + # checks on data consistency + assert region is None + assert self.data.shape == var.data.shape + assert ctype in ["spatial","temporal","spatiotemporal"] + + # determine arguments for functions + axes = None + out_time = None + out_lat = None + out_lon = None + out_area = None + out_ndata = None + if ctype == "temporal": + axes = 0 + if self.spatial: + out_lat = self.lat + out_lon = self.lon + out_area = self.area + elif self.ndata: + out_ndata = self.ndata + elif ctype == "spatial": + if self.spatial: axes = range(self.data.ndim)[-2:] + if self.ndata: axes = self.data.ndim-1 + if self.temporal: out_time = self.time + out_time_bnds = None + if out_time is not None: out_time_bnds = self.time_bnds + r = _correlation(self.data,var.data,axes=axes) + return Variable(data=r,unit="1", + name="%s_correlation_of_%s" % (ctype,self.name), + time=out_time,time_bnds=out_time_bnds,ndata=out_ndata, + lat=out_lat,lon=out_lon,area=out_area) + + def bias(self,var): + """Computes the bias between a given variable and this variable. + + Parameters + ---------- + var : ILAMB.Variable.Variable + The variable with which we will measure bias + + Returns + ------- + bias : ILAMB.Variable.Variable + the bias + """ + # If not a temporal variable, then we assume that the user is + # passing in mean data and return the difference. + lat,lon,area = self.lat,self.lon,self.area + if not self.temporal: + assert self.temporal == var.temporal + bias = self.spatialDifference(var) + bias.name = "bias_of_%s" % self.name + return bias + if self.spatial: + # If the data is spatial, then we interpolate it on a + # common grid and take the difference. + + same_grid = False + try: + same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon) + except: + pass + if not same_grid: + lat,lon = il.ComposeSpatialGrids(self,var) + area = None + self_int = self.interpolate(lat=lat,lon=lon) + var_int = var .interpolate(lat=lat,lon=lon) + data = var_int.data-self_int.data + mask = var_int.data.mask+self_int.data.mask + else: + data = var.data -self.data + mask = var.data.mask+self.data.mask + + elif (self.ndata or self.time.size == self.data.size): + # If the data are at sites, then take the difference + data = var.data.data-self.data.data + mask = var.data.mask+self.data.mask + else: + raise il.NotSpatialVariable("Cannot take bias of scalars") + # Finally we return the temporal mean of the difference + bias = Variable(data=np.ma.masked_array(data,mask=mask), + name="bias_of_%s" % self.name,time=self.time,time_bnds=self.time_bnds, + unit=self.unit,ndata=self.ndata, + lat=lat,lon=lon,area=area, + depth_bnds = self.depth_bnds).integrateInTime(mean=True) + bias.name = bias.name.replace("_integrated_over_time_and_divided_by_time_period","") + return bias + + def rmse(self,var): + """Computes the RMSE between a given variable and this variable. + + Parameters + ---------- + var : ILAMB.Variable.Variable + The variable with which we will measure RMSE + + Returns + ------- + RMSE : ILAMB.Variable.Variable + the RMSE + + """ + # If not a temporal variable, then we assume that the user is + # passing in mean data and return the difference. + lat,lon,area = self.lat,self.lon,self.area + if not self.temporal: + assert self.temporal == var.temporal + rmse = self.spatialDifference(var) + rmse.name = "rmse_of_%s" % self.name + return rmse + if self.spatial: + # If the data is spatial, then we interpolate it on a + # common grid and take the difference. + same_grid = False + try: + same_grid = np.allclose(self.lat,var.lat)*np.allclose(self.lon,var.lon) + except: + pass + if not same_grid: + lat,lon = il.ComposeSpatialGrids(self,var) + area = None + self_int = self.interpolate(lat=lat,lon=lon) + var_int = var .interpolate(lat=lat,lon=lon) + data = var_int.data-self_int.data + mask = var_int.data.mask+self_int.data.mask + else: + data = var.data -self.data + mask = var.data.mask+self.data.mask + elif (self.ndata or self.time.size == self.data.size): + # If the data are at sites, then take the difference + data = var.data.data-self.data.data + mask = var.data.mask+self.data.mask + else: + raise il.NotSpatialVariable("Cannot take rmse of scalars") + # Finally we return the temporal mean of the difference squared + np.seterr(over='ignore',under='ignore') + data *= data + np.seterr(over='raise',under='raise') + rmse = Variable(data=np.ma.masked_array(data,mask=mask), + name="rmse_of_%s" % self.name,time=self.time,time_bnds=self.time_bnds, + unit=self.unit,ndata=self.ndata, + lat=lat,lon=lon,area=area, + depth_bnds = self.depth_bnds).integrateInTime(mean=True) + rmse.name = rmse.name.replace("_integrated_over_time_and_divided_by_time_period","") + rmse.data = np.sqrt(rmse.data) + return rmse + + def rms(self): + """Computes the RMS of this variable. + + Returns + ------- + RMS : ILAMB.Variable.Variable + the RMS + + """ + if not self.temporal: raise il.NotTemporalVariable() + unit = self.unit + np.seterr(over='ignore',under='ignore') + data = self.data**2 + np.seterr(over='raise',under='raise') + rms = Variable(data = data, + unit = "1", # will change later + name = "tmp", # will change later + ndata = self.ndata, + lat = self.lat, + lon = self.lon, + area = self.area, + time = self.time).integrateInTime(mean=True) + np.seterr(over='ignore',under='ignore') + rms.data = np.sqrt(rms.data) + np.seterr(over='raise',under='raise') + rms.unit = unit + rms.name = "rms_of_%s" % self.name + return rms + + def interannualVariability(self): + """Computes the interannual variability. + + The internannual variability in this case is defined as the + standard deviation of the data in the temporal dimension. + + Returns + ------- + iav : ILAMB.Variable.Variable + the interannual variability variable + """ + if not self.temporal: raise il.NotTemporalVariable + np.seterr(over='ignore',under='ignore') + data = self.data.std(axis=0) + np.seterr(over='raise',under='raise') + return Variable(data=data, + name="iav_of_%s" % self.name, + unit=self.unit,ndata=self.ndata, + lat=self.lat,lon=self.lon,area=self.area, + depth_bnds = self.depth_bnds) + + def spatialDistribution(self,var,region="global"): + r"""Evaluates how well the input variable is spatially distributed relative to this variable. + + This routine returns the normalized standard deviation and + correlation (needed for a Taylor plot) as well as a score + given as + + .. math:: \frac{4(1+R)}{((\sigma+\frac{1}{\sigma})^2 (1+R_0))} + + where :math:`R` is the correlation, :math:`R_0=1` is the + reference correlation, and :math:`\sigma` is the normalized + standard deviation. + + Parameters + ---------- + var : ILAMB.Variable.Variable + the comparison variable + region : str, optional + the name of the region over which to check the spatial distribution + + Returns + ------- + std : ILAMB.Variable.Variable + the normalized standard deviation of the input variable + R : ILAMB.Variable.Variable + the correlation of the input variable + score : ILAMB.Variable.Variable + the spatial distribution score + + """ + assert self.temporal == var.temporal == False + + r = Regions() + + # First compute the observational spatial/site standard deviation + rem_mask0 = np.copy(self.data.mask) + self.data.mask += r.getMask(region,self) + + np.seterr(over='ignore',under='ignore') + std0 = self.data.std() + np.seterr(over='raise',under='raise') + + # Next compute the model spatial/site standard deviation + rem_mask = np.copy(var.data.mask) + var.data.mask += r.getMask(region,var) + + np.seterr(over='ignore',under='ignore') + std = var.data.std() + np.seterr(over='raise',under='raise') + + # Interpolate to new grid for correlation + if self.spatial: + lat,lon = il.ComposeSpatialGrids(self,var) + self_int = self.interpolate(lat=lat,lon=lon) + var_int = var .interpolate(lat=lat,lon=lon) + else: + self_int = self + var_int = var + R = self_int.correlation(var_int,ctype="spatial") # add regions + if type(R.data) is np.ma.core.MaskedConstant: R.data = 0. + + # Restore masks + self.data.mask = rem_mask0 + var.data.mask = rem_mask + + # Put together scores, we clip the standard deviation of both + # variables at the same small amount, meant to avoid division + # by zero errors. + try: + R0 = 1.0 + std0 = std0.clip(1e-12) + std = std .clip(1e-12) + std = std/std0 + score = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0)) + except: + std = np.asarray([0.0]) + score = np.asarray([0.0]) + std = Variable(data=std ,name="normalized_spatial_std_of_%s_over_%s" % (self.name,region),unit="1") + score = Variable(data=score,name="spatial_distribution_score_of_%s_over_%s" % (self.name,region),unit="1") + return std,R,score + + def coarsenInTime(self,intervals,window=0.): + """Compute the mean function value in each of the input intervals. + + Parameters + ---------- + intervals : array of shape (n,2) + An array of n intervals where the first entry is the + beginning and the second entry is the end of the interval + window : float, optional + Extend each interval before and after by this amount of time + + Returns + ------- + coarse : ILAMB.Variable.Variable + The coarsened variable + """ + if not self.temporal: raise il.NotTemporalVariable + assert intervals.ndim == 2 + n = intervals.shape[0] + shp = (n,)+self.data.shape[1:] + time = np.zeros(n) + data = np.ma.zeros(shp) + for i in range(n): + t0 = intervals[i,0]-window + tf = intervals[i,1]+window + time[i] = 0.5*(t0+tf) + mean = self.integrateInTime(mean=True,t0=t0,tf=tf).convert(self.unit) + data[i,...] = mean.data + return Variable(name = "coarsened_%s" % self.name, + unit = self.unit, + time = time, + time_bnds = intervals, + data = data, + ndata = self.ndata, + lat = self.lat, + lon = self.lon, + area = self.area, + depth_bnds = self.depth_bnds) + + def accumulateInTime(self): + r"""For each time interval, accumulate variable from the beginning. + + For each time interval :math:`i` in the variable, defined by + :math:`[t_0^i,t_f^i]`, compute + + .. math:: \int_{t_0^0}^{t_f^i} v(t,\dots)\ dt + + This routine is useful, for example, if the variable is a mass + rate defined over time and we wish to know the mass + accumulation as a function of time. + + Returns + ------- + sum : ILAMB.Variable.Variable + The cumulative sum of this variable + + """ + if not self.temporal: raise il.NotTemporalVariable + n = self.time.size + shp = (n+1,) + self.data.shape[1:] + time = np.zeros(n+1) + data = np.ma.zeros(shp) + time[0] = self.time_bnds[0,0] + for i in range(n): + t0 = self.time_bnds[i,0] + tf = self.time_bnds[i,1] + isum = self.integrateInTime(t0=t0,tf=tf) + time[i+1] = tf + data[i+1,...] = data[i,...] + isum.data + + return Variable(name = "cumulative_sum_%s" % self.name, + unit = isum.unit, + time = time, + data = data, + lat = self.lat, + lon = self.lon, + area = self.area) + + + def trim(self,lat=None,lon=None,t=None,d=None): + """Trim away a variable in space/time in place. + + Parameters + ---------- + lat,lon,t,d : tuple or list + a 2-tuple containing the lower and upper limits beyond which we trim + """ + def _whichInterval(val,bnds): + ind = np.where((val>=bnds[:,0])*(val<=bnds[:,1]))[0] + assert ind.size <= 2 + ind = ind[0] + return ind + + if lat is not None: + assert len(lat) == 2 + if not self.spatial: raise il.NotSpatialVariable + i = _whichInterval(lat[0],self.lat_bnds) + j = _whichInterval(lat[1],self.lat_bnds)+1 + self.lat = self.lat [i:j] + self.lat_bnds = self.lat_bnds[i:j] + self.data = self.data[...,i:j,:] + self.area = self.area[ i:j,:] + if lon is not None: + assert len(lon) == 2 + if not self.spatial: raise il.NotSpatialVariable + i = _whichInterval(lon[0],self.lon_bnds) + j = _whichInterval(lon[1],self.lon_bnds)+1 + self.lon = self.lon [i:j] + self.lon_bnds = self.lon_bnds[i:j] + self.data = self.data[...,i:j] + self.area = self.area[ :,i:j] + if t is not None: + assert len(t) == 2 + if not self.temporal: raise il.NotTemporalVariable + self = il.ClipTime(self,t[0],t[1]) + if d is not None: + assert len(d) == 2 + if self.depth_bnds is None: raise ValueError + keep = (self.depth_bnds[:,1] >= d[0])*(self.depth_bnds[:,0] <= d[1]) + ind = np.where(keep)[0] + self.depth_bnds = self.depth_bnds[ind,:] + self.depth = self.depth [ind ] + self.data = self.data[...,ind,:,:] + + return self diff --git a/src/ILAMB/__init__.py b/src/ILAMB/__init__.py new file mode 100644 index 00000000..3bb5124a --- /dev/null +++ b/src/ILAMB/__init__.py @@ -0,0 +1,35 @@ +__author__ = 'Nathan Collier' +__date__ = 'Jun 2018' +__version__ = '2.3' + +from distutils.version import LooseVersion +import platform + +# These are guesses at actual requirements +requires = { + "numpy" : "1.9.2", + "matplotlib" : "1.4.3", + "netCDF4" : "1.1.4", + "cf_units" : "2.0.0", + "mpl_toolkits.basemap" : "1.0.7", + "sympy" : "0.7.6", + "mpi4py" : "1.3.1" +} + +froms = { + "mpl_toolkits.basemap" : "Basemap" +} + +for key in requires.keys(): + if "." in key: + pkg = __import__(key, globals(), locals(), [froms[key]]) + else: + pkg = __import__(key) + if LooseVersion(pkg.__version__) < LooseVersion(requires[key]): + raise ImportError( + "Bad %s version: ILAMB %s requires %s >= %s got %s" % + (key,__version__,key,requires[key],pkg.__version__)) + + + + diff --git a/src/ILAMB/constants.py b/src/ILAMB/constants.py new file mode 100644 index 00000000..8d77b8f6 --- /dev/null +++ b/src/ILAMB/constants.py @@ -0,0 +1,224 @@ +from numpy import asarray,ones,copy as npcopy +from matplotlib.colors import from_levels_and_colors +from Regions import Regions + +__all__ = ['spm','mph','hpd','mpy','dpy_noleap','dpy_gregorian','dpy_360','dpm_noleap','dpm_gregorian','dpm_360','g_per_Pg','g_per_kg','Ar_molar_mass','C_molar_mass','N_molar_mass','O_molar_mass','CO2_molar_mass','dry_air_molar_mass','dry_air_mass','dry_air_moles','co2_g_per_ppm','co2_ppm_per_kg','co2_ppm_per_C_Pg','regions','NCARclrs','NCARcmap','NCARnorm','region_names','dpy','mid_months','spd','spy'] + +# Time constants +spm = 60. # seconds per minute +mph = 60. # minutes per hour +hpd = 24. # hours per day +spd = spm*mph*hpd +spy = spd*365. +mpy = 12. # months per year +dpy_noleap = 365.0 # days per year (for no leap year calendars) +dpy_gregorian = 365.25 # days per year +dpy_360 = 360.0 # days per year (for 30 days/month) +dpm_noleap = asarray([31,28,31,30,31,30,31,31,30,31,30,31],dtype='float') # days per month +dpm_gregorian = npcopy(dpm_noleap) ; dpm_gregorian[1] = dpm_gregorian[1] + 0.25 +dpm_360 = ones(int(mpy))*30. +mid_months = asarray([15.5,45.,74.5,105.,135.5,166.,196.5,227.5,258.,288.5,319.,349.5],dtype='float') +lbl_months = ["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"] +bnd_months = asarray([0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365],dtype='int') + +dpy = {"noleap" : dpy_noleap, + "365_day" : dpy_noleap, + "360_day" : dpy_360, + "gregorian" : dpy_gregorian, + "proleptic_gregorian" : dpy_gregorian} + +# Mass unit conversions +g_per_Pg = 1e+15 # grams per Pg +g_per_kg = 1e+3 # grams per kg + +# Chemical constants +Ar_molar_mass = 39.948 # grams per mole +C_molar_mass = 12.0107 # grams per mole +N_molar_mass = 14.0067 # grams per mole +O_molar_mass = 15.9994 # grams per mole +CO2_molar_mass = C_molar_mass + 2. * O_molar_mass # grams per mole + +# Atmospheric constants +dry_air_molar_mass = 0.78084*2.*N_molar_mass + 0.20946*2.*O_molar_mass + 0.00934*Ar_molar_mass + 0.00039445*CO2_molar_mass # grams per mole +dry_air_mass = 5.1352e+21 # grams +dry_air_moles = dry_air_mass / dry_air_molar_mass +co2_g_per_ppm = dry_air_moles * CO2_molar_mass / 1.e+6 +co2_ppm_per_kg = g_per_kg / co2_g_per_ppm +co2_ppm_per_C_Pg = g_per_Pg / co2_g_per_ppm * CO2_molar_mass/C_molar_mass + +# Earth constants +earth_rad = 6.371e6 # meters + + +NCARclrs = asarray([[93,0,135], + [196,0,43], + [255,35,0], + [255,140,0], + [255,207,0], + [248,255,0], + [97,210,0], + [0,197,56], + [0,242,211], + [0,144,255], + [0,0,255]],dtype=float)/255. + +# Spatial plots and their default options +space_opts = {} +space_opts["timeint"] = { "name" :"Temporally integrated period mean", + "cmap" :"choose", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit", + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_timeint.png", + "sidelbl" :"MODEL MEAN", + "haslegend" :True } + +space_opts["timeintremap"] = { "name" :"Temporally integrated remapped period mean", + "cmap" :"choose", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit", + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_timeintremap.png", + "sidelbl" :"MAPPED MODEL MEAN", + "haslegend" :True } + +space_opts["bias"] = { "name" :"Temporally integrated period mean bias", + "cmap" :"seismic", + "sym" :True, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_bias.png", + "sidelbl" :"BIAS", + "haslegend" :True } + +space_opts["biasscore"] = { "name" :"Temporally integrated period mean bias score", + "cmap" :"RdYlGn", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_biasscore.png", + "sidelbl" :"BIAS SCORE", + "haslegend" :True } + +space_opts["rmse"] = { "name" :"Temporally integrated period mean rmse", + "cmap" :"YlOrRd", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_rmse.png", + "sidelbl" :"RMSE", + "haslegend" :True } + +space_opts["rmsescore"] = { "name" :"Temporally integrated period mean rmse score", + "cmap" :"RdYlGn", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_rmsescore.png", + "sidelbl" :"RMSE SCORE", + "haslegend" :True } + +space_opts["iav"] = { "name" :"Interannual variability", + "cmap" :"Reds", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iav.png", + "sidelbl" :"MODEL INTERANNUAL VARIABILITY", + "haslegend" :True } + +space_opts["iavscore"] = { "name" :"Interannual variability score", + "cmap" :"RdYlGn", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iavscore.png", + "sidelbl" :"INTERANNUAL VARIABILITY SCORE", + "haslegend" :True } + +space_opts["shift"] = { "name" :"Temporally integrated mean phase shift", + "cmap" :"PRGn", + "sym" :True, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_shift.png", + "sidelbl" :"DIFFERENCE IN MAX MONTH", + "haslegend" :True } + +space_opts["shiftscore"] = { "name" :"Temporally integrated mean phase shift score", + "cmap" :"RdYlGn", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_shiftscore.png", + "sidelbl" :"SEASONAL CYCLE SCORE", + "haslegend" :True } + +space_opts["phase"] = { "name" :"Temporally integrated period mean max month", + "cmap" :"jet", + "sym" :False, + "ticks" :mid_months, + "ticklabels":lbl_months, + "label" :"month", + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_phase.png", + "sidelbl" :"MODEL MAX MONTH", + "haslegend" :True } + + +time_opts = {} +time_opts["spaceint"] = { "name" : "Spatially integrated regional mean", + "section" : "Spatially integrated regional mean", + "haslegend" : False, + "pattern" : "MNAME_RNAME_spaceint.png", + "sidelbl" : "REGIONAL MEAN", + "ticks" : None, + "ticklabels" : None, + "ylabel" : "unit"} + +time_opts["accumulate"] = { "name" : "Accumulated mean", + "section" : "Spatially integrated regional mean", + "haslegend" : False, + "pattern" : "MNAME_RNAME_accumulate.png", + "sidelbl" : "ACCUMULATION", + "ticks" : None, + "ticklabels" : None, + "ylabel" : "unit"} + +time_opts["cycle"] = { "name" : "Spatially integrated regional mean cycle", + "section" : "Spatially integrated regional mean", + "haslegend" : False, + "pattern" : "MNAME_RNAME_cycle.png", + "sidelbl" : "ANNUAL CYCLE", + "ticks" : mid_months/365.+1850., + "ticklabels" : lbl_months, + "ylabel" : "unit"} + +time_opts["dtcycle"] = { "name" : "Spatially integrated regional mean detrended cycle", + "section" : "Spatially integrated regional mean", + "haslegend" : False, + "pattern" : "MNAME_RNAME_dtcycle.png", + "sidelbl" : "MONTHLY ANOMALY", + "ticks" : mid_months/365.+1850., + "ticklabels" : lbl_months, + "ylabel" : "unit"} diff --git a/src/ILAMB/ilamblib.py b/src/ILAMB/ilamblib.py new file mode 100644 index 00000000..b7cff096 --- /dev/null +++ b/src/ILAMB/ilamblib.py @@ -0,0 +1,1794 @@ +from scipy.interpolate import NearestNDInterpolator +from constants import dpy,mid_months,bnd_months +from Regions import Regions +from netCDF4 import Dataset,num2date,date2num +from datetime import datetime +from cf_units import Unit +from copy import deepcopy +from mpi4py import MPI +import numpy as np +import logging,re + +logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) + +class VarNotInFile(Exception): + def __str__(self): return "VarNotInFile" + +class VarNotMonthly(Exception): + def __str__(self): return "VarNotMonthly" + +class VarNotInModel(Exception): + def __str__(self): return "VarNotInModel" + +class VarsNotComparable(Exception): + def __str__(self): return "VarNotComparable" + +class VarNotOnTimeScale(Exception): + def __str__(self): return "VarNotOnTimeScale" + +class UnknownUnit(Exception): + def __str__(self): return "UnknownUnit" + +class AreasNotInModel(Exception): + def __str__(self): return "AreasNotInModel" + +class MisplacedData(Exception): + def __str__(self): return "MisplacedData" + +class NotTemporalVariable(Exception): + def __str__(self): return "NotTemporalVariable" + +class NotSpatialVariable(Exception): + def __str__(self): return "NotSpatialVariable" + +class UnitConversionError(Exception): + def __str__(self): return "UnitConversionError" + +class AnalysisError(Exception): + def __str__(self): return "AnalysisError" + +class NotLayeredVariable(Exception): + def __str__(self): return "NotLayeredVariable" + +class NotDatasiteVariable(Exception): + def __str__(self): return "NotDatasiteVariable" + +def FixDumbUnits(unit): + r"""Try to fix the dumb units people insist on using. + + Parameters + ---------- + unit : str + the trial unit + + Returns + ------- + unit : str + the fixed unit + """ + # Various synonyms for 1 + if unit.lower().strip() in ["unitless", + "n/a", + "none"]: unit = "1" + # Remove the C which so often is used to mean carbon but actually means coulomb + tokens = re.findall(r"[\w']+", unit) + for token in tokens: + if token.endswith("C") and Unit(token[:-1]).is_convertible(Unit("g")): + unit = unit.replace(token,token[:-1]) + return unit + +def GenerateDistinctColors(N,saturation=0.67,value=0.67): + r"""Generates a series of distinct colors. + + Computes N distinct colors using HSV color space, holding the + saturation and value levels constant and linearly vary the + hue. Colors are returned as a RGB tuple. + + Parameters + ---------- + N : int + number of distinct colors to generate + saturation : float, optional + argument of HSV color space + value : float, optional + argument of HSV color space + + Returns + ------- + RGB_tuples : list + list of N distinct RGB tuples + """ + from colorsys import hsv_to_rgb + HSV_tuples = [(x/float(N), saturation, value) for x in range(N)] + RGB_tuples = map(lambda x: hsv_to_rgb(*x), HSV_tuples) + return RGB_tuples + +def ConvertCalendar(t,tbnd=None): + r"""Converts calendar representations to a single standard. + + This routine converts the representation of time to the ILAMB + default: days since 1850-1-1 00:00:00 on a 365-day calendar. This + is so we can make comparisons with data from other models and + benchmarks. + + Parameters + ---------- + t : netCDF4 variable + the netCDF4 variable which represents time + tbnd : netCDF4 variable, optional + the netCDF4 variable which represents the bounds of time + + Returns + ------- + ta : numpy.ndarray + a numpy array of the converted times + tabnd : numpy.ndarray, optional + a numpy array of the converted boundary times + + """ + # If not calendar is given, we will assume it is 365_day + unit = t.units + if "calendar" in t.ncattrs(): + calendar = t.calendar.lower() + else: + calendar = "365_day" + + # If bounds are given, we will use those instead and later compute + # the time as the midpoint of the bounds. + if tbnd is None: + ta = t + else: + ta = tbnd + + # The datum might be different, use netCDF functions to shift it + ta = num2date(ta[...],unit ,calendar=calendar) + ta = date2num(ta ,"days since 1850-1-1",calendar=calendar) + + # Differences in calendars need to be handled differently + # depending on the intended temporal resolution. Here we introduce + # special code for different cases. + if tbnd is None: + if t[...].size == 1: + dt = 0 + else: + dt = (ta[1:]-ta[:-1]).mean() + else: + dt = (ta[:,1]-ta[:,0]).mean() + if np.allclose(dt,30,atol=3): # monthly + + tmid = np.copy(ta) + if tmid.ndim > 1: tmid = ta.mean(axis=1) + + # Determine the month index by finding to which mid_month day + # the middle time point is closest. + def _dpyShift(tmid,ta,dpy): + yrs = np.floor((tmid / float(dpy)))*365. + ind = np.abs((tmid % float(dpy))[:,np.newaxis]-mid_months).argmin(axis=1) + if ta.ndim == 1: + ta = yrs + mid_months[ind] + if ta.ndim == 2: + ta[:,0] = yrs + bnd_months[ind] + ta[:,1] = yrs + bnd_months[ind+1] + return ta + if calendar == "360_day": + ta = _dpyShift(tmid,ta,360) + elif calendar == "366_day": + ta = _dpyShift(tmid,ta,366) + elif calendar in ["365_day","noleap"]: + ta = _dpyShift(tmid,ta,365) + elif calendar in ["proleptic_gregorian","gregorian","standard","julian"]: + # we can use datetime to get the Julian day and then find + # how these line up with mid_months + tmid = num2date(tmid,"days since 1850-1-1",calendar=calendar) + yrs = [float(t.year-1850)*365. for t in tmid] + tmid = [float(t.timetuple().tm_yday) for t in tmid] + tmid = np.asarray(tmid) + ind = np.abs(tmid[:,np.newaxis]-mid_months).argmin(axis=1) + if ta.ndim == 1: + ta = yrs + mid_months[ind] + if ta.ndim == 2: + ta[:,0] = yrs + bnd_months[ind] + ta[:,1] = yrs + bnd_months[ind+1] + else: + raise ValueError("Unsupported calendar: %s" % calendar) + + if tbnd is None: return ta + t = ta.mean(axis=1) + return t,ta + +def CellAreas(lat,lon,lat_bnds=None,lon_bnds=None): + """Given arrays of latitude and longitude, return cell areas in square meters. + + Parameters + ---------- + lat : numpy.ndarray + a 1D array of latitudes which represent cell centroids + lon : numpy.ndarray + a 1D array of longitudes which represent cell centroids + + Returns + ------- + areas : numpy.ndarray + a 2D array of cell areas in [m2] + """ + from constants import earth_rad + + if (lat_bnds is not None and lon_bnds is not None): + return earth_rad**2*np.outer((np.sin(lat_bnds[:,1]*np.pi/180.)- + np.sin(lat_bnds[:,0]*np.pi/180.)), + (lon_bnds[:,1]-lon_bnds[:,0])*np.pi/180.) + + x = np.zeros(lon.size+1) + x[1:-1] = 0.5*(lon[1:]+lon[:-1]) + x[ 0] = lon[ 0]-0.5*(lon[ 1]-lon[ 0]) + x[-1] = lon[-1]+0.5*(lon[-1]-lon[-2]) + if(x.max() > 181): x -= 180 + x = x.clip(-180,180) + x *= np.pi/180. + + y = np.zeros(lat.size+1) + y[1:-1] = 0.5*(lat[1:]+lat[:-1]) + y[ 0] = lat[ 0]-0.5*(lat[ 1]-lat[ 0]) + y[-1] = lat[-1]+0.5*(lat[-1]-lat[-2]) + y = y.clip(-90,90) + y *= np.pi/180. + + dx = earth_rad*(x[1:]-x[:-1]) + dy = earth_rad*(np.sin(y[1:])-np.sin(y[:-1])) + areas = np.outer(dx,dy).T + + return areas + +def GlobalLatLonGrid(res,**keywords): + r"""Generates a latitude/longitude grid at a desired resolution + + Computes 1D arrays of latitude and longitude values which + correspond to cell interfaces and centroids at a given resolution. + + Parameters + ---------- + res : float + the desired resolution of the grid in degrees + from_zero : boolean + sets longitude convention { True:(0,360), False:(-180,180) } + + Returns + ------- + lat_bnd : numpy.ndarray + a 1D array of latitudes which represent cell interfaces + lon_bnd : numpy.ndarray + a 1D array of longitudes which represent cell interfaces + lat : numpy.ndarray + a 1D array of latitudes which represent cell centroids + lon : numpy.ndarray + a 1D array of longitudes which represent cell centroids + """ + from_zero = keywords.get("from_zero",False) + res_lat = keywords.get("res_lat",res) + res_lon = keywords.get("res_lon",res) + nlon = int(360./res_lon)+1 + nlat = int(180./res_lat)+1 + lon_bnd = np.linspace(-180,180,nlon) + if from_zero: lon_bnd += 180 + lat_bnd = np.linspace(-90,90,nlat) + lat = 0.5*(lat_bnd[1:]+lat_bnd[:-1]) + lon = 0.5*(lon_bnd[1:]+lon_bnd[:-1]) + return lat_bnd,lon_bnd,lat,lon + +def NearestNeighborInterpolation(lat1,lon1,data1,lat2,lon2): + r"""Interpolates globally grided data at another resolution + + Parameters + ---------- + lat1 : numpy.ndarray + a 1D array of latitudes of cell centroids corresponding to the + source data + lon1 : numpy.ndarray + a 1D array of longitudes of cell centroids corresponding to the + source data + data1 : numpy.ndarray + an array of data to be interpolated of shape = (lat1.size,lon1.size,...) + lat2 : numpy.ndarray + a 1D array of latitudes of cell centroids corresponding to the + target resolution + lon2 : numpy.ndarray + a 1D array of longitudes of cell centroids corresponding to the + target resolution + + Returns + ------- + data2 : numpy.ndarray + an array of interpolated data of shape = (lat2.size,lon2.size,...) + """ + rows = np.apply_along_axis(np.argmin,1,np.abs(lat2[:,np.newaxis]-lat1)) + cols = np.apply_along_axis(np.argmin,1,np.abs(lon2[:,np.newaxis]-lon1)) + data2 = data1[np.ix_(rows,cols)] + return data2 + +def TrueError(lat1_bnd,lon1_bnd,lat1,lon1,data1,lat2_bnd,lon2_bnd,lat2,lon2,data2): + r"""Computes the pointwise difference between two sets of gridded data + + To obtain the pointwise error we populate a list of common cell + interfaces and then interpolate both input arrays to the composite + grid resolution using nearest-neighbor interpolation. + + Parameters + ---------- + lat1_bnd, lon1_bnd, lat1, lon1 : numpy.ndarray + 1D arrays corresponding to the latitude/longitudes of the cell + interfaces/centroids + data1 : numpy.ndarray + an array of data to be interpolated of shape = (lat1.size,lon1.size,...) + lat2_bnd, lon2_bnd, lat2, lon2 : numpy.ndarray + 1D arrays corresponding to the latitude/longitudes of the cell + interfaces/centroids + data2 : numpy.ndarray + an array of data to be interpolated of shape = (lat2.size,lon2.size,...) + + Returns + ------- + lat_bnd, lon_bnd, lat, lon : numpy.ndarray + 1D arrays corresponding to the latitude/longitudes of the cell + interfaces/centroids of the resulting error + error : numpy array + an array of the pointwise error of shape = (lat.size,lon.size,...) + """ + # combine limits, sort and remove duplicates + lat_bnd = np.hstack((lat1_bnd,lat2_bnd)); lat_bnd.sort(); lat_bnd = np.unique(lat_bnd) + lon_bnd = np.hstack((lon1_bnd,lon2_bnd)); lon_bnd.sort(); lon_bnd = np.unique(lon_bnd) + + # need centroids of new grid for nearest-neighbor interpolation + lat = 0.5*(lat_bnd[1:]+lat_bnd[:-1]) + lon = 0.5*(lon_bnd[1:]+lon_bnd[:-1]) + + # interpolate datasets at new grid + d1 = NearestNeighborInterpolation(lat1,lon1,data1,lat,lon) + d2 = NearestNeighborInterpolation(lat2,lon2,data2,lat,lon) + + # relative to the first grid/data + error = d2-d1 + return lat_bnd,lon_bnd,lat,lon,error + +def SympifyWithArgsUnits(expression,args,units): + """Uses symbolic algebra to determine the final unit of an expression. + + Parameters + ---------- + expression : str + the expression whose units you wish to simplify + args : dict + a dictionary of numpy arrays whose keys are the + variables written in the input expression + units : dict + a dictionary of strings representing units whose keys are the + variables written in the input expression + + """ + from sympy import sympify,postorder_traversal + + expression = sympify(expression) + + # try to convert all arguments to same units if possible, it + # catches most use cases + keys = args.keys() + for i,key0 in enumerate(keys): + for key in keys[(i+1):]: + try: + Unit(units[key]).convert(args[key],Unit(units[key0]),inplace=True) + units[key] = units[key0] + except: + pass + + for expr in postorder_traversal(expression): + ekey = str(expr) + if expr.is_Add: + + # if there are scalars in the expression, these will not + # be in the units dictionary. Add them and give them an + # implicit unit of 1 + keys = [str(arg) for arg in expr.args] + for key in keys: + if not units.has_key(key): units[key] = "1" + + # if we are adding, all arguments must have the same unit. + key0 = keys[0] + for key in keys: + Unit(units[key]).convert(np.ones(1),Unit(units[key0])) + units[key] = units[key0] + units[ekey] = "%s" % (units[key0]) + + elif expr.is_Pow: + + # if raising to a power, just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = "(%s)%s" % (units[keys[0]],keys[1]) + + elif expr.is_Mul: + + # just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = " ".join(["(%s)" % units[key] for key in keys if units.has_key(key)]) + return sympify(str(expression),locals=args),units[ekey] + + +def ComputeIndexingArrays(lat2d,lon2d,lat,lon): + """Blah. + + Parameters + ---------- + lat : numpy.ndarray + A 1D array of latitudes of cell centroids + lon : numpy.ndarray + A 1D array of longitudes of cell centroids + + """ + # Prepare the interpolator + points = np.asarray([lat2d.flatten(),lon2d.flatten()]).T + values = np.asarray([(np.arange(lat2d.shape[0])[:,np.newaxis]*np.ones (lat2d.shape[1])).flatten(), + (np.ones (lat2d.shape[0])[:,np.newaxis]*np.arange(lat2d.shape[1])).flatten()]).T + fcn = NearestNDInterpolator(points,values) + LAT,LON = np.meshgrid(lat,lon,indexing='ij') + gmap = fcn(LAT.flatten(),LON.flatten()).astype(int) + return gmap[:,0].reshape(LAT.shape),gmap[:,1].reshape(LAT.shape) + +def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=None): + """Extracts data from a netCDF4 datafile for use in a Variable object. + + Intended to be used inside of the Variable constructor. Some of + the return arguments will be None depending on the contents of the + netCDF4 file. + + Parameters + ---------- + filename : str + Name of the netCDF4 file from which to extract a variable + variable_name : str + Name of the variable to extract from the netCDF4 file + alternate_vars : list of str, optional + A list of possible alternate variable names to find + t0 : float, optional + If temporal, specifying the initial time can reduce memory + usage and speed up access time. + tf : float, optional + If temporal, specifying the final time can reduce memory + usage and speed up access time. + + Returns + ------- + data : numpy.ma.masked_array + The array which contains the data which constitutes the variable + unit : str + The unit of the input data + name : str + The name of the variable, will be how it is saved in an output netCDF4 file + time : numpy.ndarray + A 1D array of times in days since 1850-01-01 00:00:00 + time_bnds : numpy.ndarray + A 1D array of time bounds in days since 1850-01-01 00:00:00 + lat : numpy.ndarray + A 1D array of latitudes of cell centroids + lon : numpy.ndarray + A 1D array of longitudes of cell centroids + area : numpy.ndarray + A 2D array of the cell areas + ndata : int + Number of data sites this data represents + depth_bnds : numpy.ndarray + A 1D array of the depth boundaries of each cell + """ + try: + dset = Dataset(filename,mode="r") + if group is None: + grp = dset + else: + grp = dset.groups[group] + except RuntimeError: + raise RuntimeError("Unable to open the file: %s" % filename) + + found = False + if variable_name in grp.variables.keys(): + found = True + var = grp.variables[variable_name] + else: + while alternate_vars.count(None) > 0: alternate_vars.pop(alternate_vars.index(None)) + for var_name in alternate_vars: + if var_name in grp.variables.keys(): + found = True + var = grp.variables[var_name] + if found == False: + alternate_vars.insert(0,variable_name) + raise RuntimeError("Unable to find [%s] in the file: %s" % (",".join(alternate_vars),filename)) + + # Check on dimensions + time_name = [name for name in var.dimensions if "time" in name.lower()] + lat_name = [name for name in var.dimensions if "lat" in name.lower()] + lon_name = [name for name in var.dimensions if "lon" in name.lower()] + data_name = [name for name in var.dimensions if "data" in name.lower()] + missed = [name for name in var.dimensions if name not in (time_name + + lat_name + + lon_name + + data_name)] + + # Lat/lon might be indexing arrays, find their shape + shp = None + if (len(lat_name) == 0 and len(lon_name) == 0 and len(missed) >= 2 and len(data_name) == 0): + # remove these dimensions from the missed variables + i,j = var.dimensions[-2],var.dimensions[-1] + if i in missed: missed.pop(missed.index(i)) + if j in missed: missed.pop(missed.index(j)) + i = grp.variables[i] + j = grp.variables[j] + if (np.issubdtype(i.dtype,np.integer) and + np.issubdtype(j.dtype,np.integer)): shp = [len(i),len(j)] + + # Lat/lon might just be sizes + if (len(lat_name) == 1 and len(lon_name) == 1): + if not (lat_name[0] in grp.variables and lon_name[0] in grp.variables): + shp = [len(grp.dimensions[lat_name[0]]),len(grp.dimensions[lon_name[0]])] + + # If these were sizes, then we need to find the correct 2D lat/lon arrays + if shp is not None: + + # We want to remove any false positives we might find. I don't + # want to consider variables which are 'bounds' or dimensions + # of others, nor those that don't have the correct shape. + bnds = [grp.variables[v].bounds for v in grp.variables if "bounds" in grp.variables[v].ncattrs()] + dims = [v for v in grp.variables if (v in grp.dimensions)] + poss = [v for v in grp.variables if (v not in dims and + v not in bnds and + np.allclose(shp,grp.variables[v].shape) if len(shp) == len(grp.variables[v].shape) else False)] + lat_name = [name for name in poss if "lat" in name.lower()] + lon_name = [name for name in poss if "lon" in name.lower()] + + # If still ambiguous, look inside the variable attributes for + # the presence of the variable name to give further + # preference. + attrs = [str(var.getncattr(attr)) for attr in var.ncattrs()] + if len(lat_name) == 0: raise ValueError("Unable to find values for the latitude dimension in %s" % (filename)) + if len(lat_name) > 1: + tmp_name = [name for name in lat_name if np.any([name in attr for attr in attrs])] + if len(tmp_name) > 0: lat_name = tmp_name + if len(lon_name) == 0: raise ValueError("Unable to find values for the longitude dimension in %s" % (filename)) + if len(lon_name) > 1: + tmp_name = [name for name in lon_name if np.any([name in attr for attr in attrs])] + if len(tmp_name) > 0: lon_name = tmp_name + + # Time dimension + if len(time_name) == 1: + time_name = time_name[0] + time_bnd_name = grp.variables[time_name].bounds if (time_name in grp.variables and + "bounds" in grp.variables[time_name].ncattrs()) else None + if time_bnd_name not in grp.variables: time_bnd_name = None + elif len(time_name) >= 1: + raise ValueError("Ambiguous choice of values for the time dimension [%s] in %s" % (",".join(time_name),filename)) + else: + time_name = None + time_bnd_name = None + + # Lat dimension + if len(lat_name) == 1: + lat_name = lat_name[0] + lat_bnd_name = grp.variables[lat_name].bounds if (lat_name in grp.variables and + "bounds" in grp.variables[lat_name].ncattrs()) else None + if lat_bnd_name not in grp.variables: lat_bnd_name = None + elif len(lat_name) >= 1: + raise ValueError("Ambiguous choice of values for the latitude dimension [%s] in %s" % (",".join(lat_name),filename)) + else: + lat_name = None + lat_bnd_name = None + + # Lon dimension + if len(lon_name) == 1: + lon_name = lon_name[0] + lon_bnd_name = grp.variables[lon_name].bounds if (lon_name in grp.variables and + "bounds" in grp.variables[lon_name].ncattrs()) else None + if lon_bnd_name not in grp.variables: lon_bnd_name = None + elif len(lon_name) >= 1: + raise ValueError("Ambiguous choice of values for the longitude dimension [%s] in %s" % (",".join(lon_name),filename)) + else: + lon_name = None + lon_bnd_name = None + + # Data dimension + if len(data_name) == 1: + data_name = data_name[0] + elif len(data_name) >= 1: + raise ValueError("Ambiguous choice of values for the data dimension [%s] in %s" % (",".join(data_name),filename)) + else: + data_name = None + + # The layered dimension is whatever is leftover since its name + # could be many things + if len(missed) == 1: + depth_name = missed[0] + depth_bnd_name = grp.variables[depth_name].bounds if (depth_name in grp.variables and + "bounds" in grp.variables[depth_name].ncattrs()) else None + if depth_bnd_name not in grp.variables: depth_bnd_name = None + elif len(missed) >= 1: + raise ValueError("Ambiguous choice of values for the layered dimension [%s] in %s" % (",".join(missed),filename)) + else: + depth_name = None + depth_bnd_name = None + + # Based on present values, get dimensions and bounds + t = None; t_bnd = None + lat = None; lat_bnd = None + lon = None; lon_bnd = None + depth = None; depth_bnd = None + data = None; + cbounds = None + if time_name is not None: + if time_bnd_name is None: + t = ConvertCalendar(grp.variables[time_name]) + else: + t,t_bnd = ConvertCalendar(grp.variables[time_name],grp.variables[time_bnd_name]) + if "climatology" in grp.variables[time_name].ncattrs(): + cbounds = grp.variables[grp.variables[time_name].climatology] + if not np.allclose(cbounds.shape,[12,2]): + raise RuntimeError("ILAMB only supports annual cycle style climatologies") + cbounds = np.round(cbounds[0,:]/365.+1850.) + if lat_name is not None: lat = grp.variables[lat_name] [...] + if lat_bnd_name is not None: lat_bnd = grp.variables[lat_bnd_name] [...] + if lon_name is not None: lon = grp.variables[lon_name] [...] + if lon_bnd_name is not None: lon_bnd = grp.variables[lon_bnd_name] [...] + if depth_name is not None: + dunit = None + if "units" in grp.variables[depth_name].ncattrs(): dunit = grp.variables[depth_name].units + depth = grp.variables[depth_name][...] + if depth_bnd_name is not None: + depth_bnd = grp.variables[depth_bnd_name][...] + if dunit is not None: + if not Unit(dunit).is_convertible(Unit("m")): + raise ValueError("Non-linear units [%s] of the layered dimension [%s] in %s" % (dunit,depth_name,filename)) + depth = Unit(dunit).convert(depth,Unit("m"),inplace=True) + if depth_bnd is not None: + depth_bnd = Unit(dunit).convert(depth_bnd,Unit("m"),inplace=True) + + if data_name is not None: + data = len(grp.dimensions[data_name]) + # if we have data sites, there may be lat/lon data to come + # along with them although not a dimension of the variable + for key in grp.variables.keys(): + if "lat" in key: lat_name = key + if "lon" in key: lon_name = key + if lat_name is not None: lat = grp.variables[lat_name][...] + if lon_name is not None: lon = grp.variables[lon_name][...] + if lat.size != data: lat = None + if lon.size != data: lon = None + + # read in data array, roughly subset in time if bounds are + # provided for added effciency + if (t is not None) and (t0 is not None or tf is not None): + begin = 0; end = t.size + if t0 is not None: begin = max(t.searchsorted(t0)-1,begin) + if tf is not None: end = min(t.searchsorted(tf)+1,end) + v = var[begin:end,...] + t = t [begin:end] + if t_bnd is not None: + t_bnd = t_bnd[begin:end,:] + else: + v = var[...] + + # If lat and lon are 2D, then we will need to interpolate things + if lat is not None and lon is not None: + if lat.ndim == 2 and lon.ndim == 2: + assert lat.shape == lon.shape + + # Create the grid + res = 1.0 + lat_bnds = np.arange(round(lat.min(),0), + round(lat.max(),0)+res/2.,res) + lon_bnds = np.arange(round(lon.min(),0), + round(lon.max(),0)+res/2.,res) + lats = 0.5*(lat_bnds[:-1]+lat_bnds[1:]) + lons = 0.5*(lon_bnds[:-1]+lon_bnds[1:]) + ilat,ilon = ComputeIndexingArrays(lat,lon,lats,lons) + r = np.sqrt( (lat[ilat,ilon]-lats[:,np.newaxis])**2 + + (lon[ilat,ilon]-lons[np.newaxis,:])**2 ) + v = v[...,ilat,ilon] + v = np.ma.masked_array(v,mask=v.mask+(r>2*res)) + lat = lats + lon = lons + lat_bnd = np.zeros((lat.size,2)) + lat_bnd[:,0] = lat_bnds[:-1] + lat_bnd[:,1] = lat_bnds[+1:] + lon_bnd = lon_bnds + lon_bnd = np.zeros((lon.size,2)) + lon_bnd[:,0] = lon_bnds[:-1] + lon_bnd[:,1] = lon_bnds[+1:] + + # handle incorrect or absent masking of arrays + if type(v) != type(np.ma.empty(1)): + mask = np.zeros(v.shape,dtype=int) + if "_FillValue" in var.ncattrs(): mask += (np.abs(v-var._FillValue )<1e-12) + if "missing_value" in var.ncattrs(): mask += (np.abs(v-var.missing_value)<1e-12) + v = np.ma.masked_array(v,mask=mask,copy=False) + + if "units" in var.ncattrs(): + units = FixDumbUnits(var.units) + else: + units = "1" + dset.close() + + return v,units,variable_name,t,t_bnd,lat,lat_bnd,lon,lon_bnd,depth,depth_bnd,cbounds,data + +def Score(var,normalizer): + """Remaps a normalized variable to the interval [0,1]. + + Parameters + ---------- + var : ILAMB.Variable.Variable + The variable to normalize, usually represents an error of some sort + normalizer : ILAMB.Variable.Variable + The variable by which we normalize + """ + from Variable import Variable + name = var.name.replace("bias","bias_score") + name = name.replace("diff","diff_score") + name = name.replace("rmse","rmse_score") + name = name.replace("iav" ,"iav_score") + np.seterr(over='ignore',under='ignore') + data = np.exp(-np.abs(var.data/normalizer.data)) + data[data<1e-16] = 0. + np.seterr(over='raise',under='raise') + return Variable(name = name, + data = data, + unit = "1", + ndata = var.ndata, + lat = var.lat, lat_bnds = var.lat_bnds, + lon = var.lon, lon_bnds = var.lon_bnds, + area = var.area) + +def ComposeSpatialGrids(var1,var2): + """Creates a grid which conforms the boundaries of both variables. + + This routine takes the union of the latitude and longitude + cell boundaries of both grids and returns a new set of + latitudes and longitudes which represent cell centers of the + new grid. + + Parameters + ---------- + var1,var2 : ILAMB.Variable.Variable + The two variables for which we wish to find a common grid + + Returns + ------- + lat : numpy.ndarray + a 1D array of latitudes of cell centroids + lon : numpy.ndarray + a 1D array of longitudes of cell centroids + """ + if not var1.spatial: il.NotSpatialVariable() + if not var2.spatial: il.NotSpatialVariable() + def _make_bnds(x): + bnds = np.zeros(x.size+1) + bnds[1:-1] = 0.5*(x[1:]+x[:-1]) + bnds[ 0] = max(x[ 0]-0.5*(x[ 1]-x[ 0]),-180) + bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) + return bnds + lat1_bnd = _make_bnds(var1.lat) + lon1_bnd = _make_bnds(var1.lon) + lat2_bnd = _make_bnds(var2.lat) + lon2_bnd = _make_bnds(var2.lon) + lat_bnd = np.hstack((lat1_bnd,lat2_bnd)); lat_bnd.sort(); lat_bnd = np.unique(lat_bnd) + lon_bnd = np.hstack((lon1_bnd,lon2_bnd)); lon_bnd.sort(); lon_bnd = np.unique(lon_bnd) + lat = 0.5*(lat_bnd[1:]+lat_bnd[:-1]) + lon = 0.5*(lon_bnd[1:]+lon_bnd[:-1]) + return lat,lon + +def ScoreSeasonalCycle(phase_shift): + """Computes the seasonal cycle score from the phase shift. + + Possible remove this function as we do not compute other score + components via a ilamblib function. + """ + from Variable import Variable + return Variable(data = (1+np.cos(np.abs(phase_shift.data)/365*2*np.pi))*0.5, + unit = "1", + name = phase_shift.name.replace("phase_shift","phase_shift_score"), + ndata = phase_shift.ndata, + lat = phase_shift.lat, lat_bnds = phase_shift.lat_bnds, + lon = phase_shift.lon, lon_bnds = phase_shift.lon_bnds, + area = phase_shift.area) + +def _composeGrids(v1,v2): + lat_bnds = np.unique(np.hstack([v1.lat_bnds.flatten(),v2.lat_bnds.flatten()])) + lon_bnds = np.unique(np.hstack([v1.lon_bnds.flatten(),v2.lon_bnds.flatten()])) + lat_bnds = lat_bnds[(lat_bnds>=- 90)*(lat_bnds<=+ 90)] + lon_bnds = lon_bnds[(lon_bnds>=-180)*(lon_bnds<=+180)] + lat_bnds = np.vstack([lat_bnds[:-1],lat_bnds[+1:]]).T + lon_bnds = np.vstack([lon_bnds[:-1],lon_bnds[+1:]]).T + lat = lat_bnds.mean(axis=1) + lon = lon_bnds.mean(axis=1) + return lat,lon,lat_bnds,lon_bnds + +def AnalysisMeanStateSites(ref,com,**keywords): + """Perform a mean state analysis. + + This mean state analysis examines the model mean state in space + and time. We compute the mean variable value over the time period + at each spatial cell or data site as appropriate, as well as the + bias and RMSE relative to the observational variable. We will + output maps of the period mean values and bias. For each spatial + cell or data site we also estimate the phase of the variable by + finding the mean time of year when the maximum occurs and the + phase shift by computing the difference in phase with respect to + the observational variable. In the spatial dimension, we compute a + spatial mean for each of the desired regions and an average annual + cycle. + + Parameters + ---------- + obs : ILAMB.Variable.Variable + the observational (reference) variable + mod : ILAMB.Variable.Variable + the model (comparison) variable + regions : list of str, optional + the regions overwhich to apply the analysis + dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the model + benchmark_dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the observations + space_mean : bool, optional + disable to compute sums of the variable over space instead of + mean values + table_unit : str, optional + the unit to use when displaying output in tables on the HTML page + plots_unit : str, optional + the unit to use when displaying output on plots on the HTML page + + """ + + from Variable import Variable + regions = keywords.get("regions" ,["global"]) + dataset = keywords.get("dataset" ,None) + benchmark_dataset = keywords.get("benchmark_dataset",None) + space_mean = keywords.get("space_mean" ,True) + table_unit = keywords.get("table_unit" ,None) + plot_unit = keywords.get("plot_unit" ,None) + mass_weighting = keywords.get("mass_weighting" ,False) + skip_rmse = keywords.get("skip_rmse" ,False) + skip_iav = keywords.get("skip_iav" ,False) + skip_cycle = keywords.get("skip_cycle" ,False) + ILAMBregions = Regions() + spatial = False + normalizer = None + + # Only study the annual cycle if it makes sense + if not ref.monthly: skip_cycle = True + if ref.time.size < 12: skip_cycle = True + if skip_rmse : skip_iav = True + + if spatial: + lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) + REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + + # We find the mean values over the time period on the original + # grid/datasites of each dataset + ref_timeint = ref.integrateInTime(mean=True) + com_timeint = com.integrateInTime(mean=True) + if spatial: + + REF_timeint = REF.integrateInTime(mean=True) + COM_timeint = COM.integrateInTime(mean=True) + + # Masks + ref_mask = REF_timeint.data.mask + com_mask = COM_timeint.data.mask + ref_and_com = (ref_mask == False) * (com_mask == False) + ref_not_com = (ref_mask == False) * (com_mask == True ) + com_not_ref = (ref_mask == True ) * (com_mask == False) + ref_and_COM = Variable(name = "ref_and_COM", unit = ref.unit, + data = np.ma.masked_array(COM_timeint.data,mask=(ref_and_com==False)), + lat = lat, lat_bnds = lat_bnds, + lon = lon, lon_bnds = lon_bnds, + area = COM_timeint.area) + COM_not_ref = Variable(name = "COM_not_ref", unit = ref.unit, + data = np.ma.masked_array(COM_timeint.data,mask=(com_not_ref==False)), + lat = lat, lat_bnds = lat_bnds, + lon = lon, lon_bnds = lon_bnds, + area = COM_timeint.area) + REF_and_com = Variable(name = "REF_and_com", unit = REF.unit, + data = np.ma.masked_array(REF_timeint.data,mask=(ref_and_com==False)), + lat = lat, lat_bnds = lat_bnds, + lon = lon, lon_bnds = lon_bnds, + area = REF_timeint.area) + REF_not_com = Variable(name = "REF_not_com", unit = REF.unit, + data = np.ma.masked_array(REF_timeint.data,mask=(ref_not_com==False)), + lat = lat, lat_bnds = lat_bnds, + lon = lon, lon_bnds = lon_bnds, + area = REF_timeint.area) + + # Apply intersection mask + REF.data.mask += np.ones(REF.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) + COM.data.mask += np.ones(COM.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) + REF_timeint.data.mask = (ref_and_com==False) + COM_timeint.data.mask = (ref_and_com==False) + + else: + + REF = ref + COM = com + REF_timeint = ref_timeint + COM_timeint = com_timeint + if mass_weighting: normalizer = REF_timeint.data + + # Compute the bias, RMSE, and RMS maps using the interpolated + # quantities + bias = REF_timeint.bias(COM_timeint) + cREF = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata) + crms = cREF.rms () + bias_score_map = Score(bias,crms) + if spatial: + bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask + if not skip_rmse: + cCOM = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata) + rmse = REF.rmse( COM) + crmse = cREF.rmse(cCOM) + rmse_score_map = Score(crmse,crms) + if not skip_iav: + ref_iav = Variable(name = "centralized %s" % ref.name, unit = ref.unit, + data = np.ma.masked_array(ref.data-ref_timeint.data[np.newaxis,...],mask=ref.data.mask), + time = ref.time, time_bnds = ref.time_bnds, + lat = ref.lat , lat_bnds = ref.lat_bnds, + lon = ref.lon , lon_bnds = ref.lon_bnds, + area = ref.area, ndata = ref.ndata).rms() + com_iav = Variable(name = "centralized %s" % com.name, unit = com.unit, + data = np.ma.masked_array(com.data-com_timeint.data[np.newaxis,...],mask=com.data.mask), + time = com.time, time_bnds = com.time_bnds, + lat = com.lat , lat_bnds = com.lat_bnds, + lon = com.lon , lon_bnds = com.lon_bnds, + area = com.area, ndata = com.ndata).rms() + REF_iav = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata).rms() + COM_iav = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata).rms() + iav_score_map = Score(Variable(name = "diff %s" % REF.name, unit = REF.unit, + data = (COM_iav.data-REF_iav.data), + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata), + REF_iav) + + # The phase shift comes from the interpolated quantities + if not skip_cycle: + ref_cycle = REF.annualCycle() + com_cycle = COM.annualCycle() + ref_maxt_map = ref_cycle.timeOfExtrema(etype="max") + com_maxt_map = com_cycle.timeOfExtrema(etype="max") + shift_map = ref_maxt_map.phaseShift(com_maxt_map) + shift_score_map = ScoreSeasonalCycle(shift_map) + shift_map.data /= 30.; shift_map.unit = "months" + + # Scalars + ref_period_mean = {}; ref_spaceint = {}; ref_mean_cycle = {}; ref_dtcycle = {} + com_period_mean = {}; com_spaceint = {}; com_mean_cycle = {}; com_dtcycle = {} + bias_val = {}; bias_score = {}; rmse_val = {}; rmse_score = {} + space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {}; iav_score = {} + ref_union_mean = {}; ref_comp_mean = {} + com_union_mean = {}; com_comp_mean = {} + for region in regions: + if spatial: + ref_period_mean[region] = ref_timeint .integrateInSpace(region=region,mean=space_mean) + ref_union_mean [region] = REF_and_com .integrateInSpace(region=region,mean=space_mean) + com_union_mean [region] = ref_and_COM .integrateInSpace(region=region,mean=space_mean) + ref_comp_mean [region] = REF_not_com .integrateInSpace(region=region,mean=space_mean) + com_comp_mean [region] = COM_not_ref .integrateInSpace(region=region,mean=space_mean) + ref_spaceint [region] = REF .integrateInSpace(region=region,mean=True) + com_period_mean[region] = com_timeint .integrateInSpace(region=region,mean=space_mean) + com_spaceint [region] = COM .integrateInSpace(region=region,mean=True) + bias_val [region] = bias .integrateInSpace(region=region,mean=True) + bias_score [region] = bias_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) + if not skip_cycle: + ref_mean_cycle[region] = ref_cycle .integrateInSpace(region=region,mean=True) + ref_dtcycle [region] = deepcopy(ref_mean_cycle[region]) + ref_dtcycle [region].data -= ref_mean_cycle[region].data.mean() + com_mean_cycle[region] = com_cycle .integrateInSpace(region=region,mean=True) + com_dtcycle [region] = deepcopy(com_mean_cycle[region]) + com_dtcycle [region].data -= com_mean_cycle[region].data.mean() + shift [region] = shift_map .integrateInSpace(region=region,mean=True,intabs=True) + shift_score [region] = shift_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + if not skip_rmse: + rmse_val [region] = rmse .integrateInSpace(region=region,mean=True) + rmse_score [region] = rmse_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) + space_std[region],space_cor[region],sd_score[region] = REF_timeint.spatialDistribution(COM_timeint,region=region) + else: + ref_period_mean[region] = ref_timeint .siteStats(region=region) + ref_spaceint [region] = ref .siteStats(region=region) + com_period_mean[region] = com_timeint .siteStats(region=region) + com_spaceint [region] = com .siteStats(region=region) + bias_val [region] = bias .siteStats(region=region) + bias_score [region] = bias_score_map .siteStats(region=region,weight=normalizer) + if not skip_cycle: + ref_mean_cycle [region] = ref_cycle .siteStats(region=region) + ref_dtcycle [region] = deepcopy(ref_mean_cycle[region]) + ref_dtcycle [region].data -= ref_mean_cycle[region].data.mean() + com_mean_cycle [region] = com_cycle .siteStats(region=region) + com_dtcycle [region] = deepcopy(com_mean_cycle[region]) + com_dtcycle [region].data -= com_mean_cycle[region].data.mean() + shift [region] = shift_map .siteStats(region=region,intabs=True) + shift_score [region] = shift_score_map.siteStats(region=region,weight=normalizer) + if not skip_rmse: + rmse_val [region] = rmse .siteStats(region=region) + rmse_score [region] = rmse_score_map .siteStats(region=region,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .siteStats(region=region,weight=normalizer) + + ref_period_mean[region].name = "Period Mean (original grids) %s" % (region) + ref_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) + com_period_mean[region].name = "Period Mean (original grids) %s" % (region) + com_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) + bias_val [region].name = "Bias %s" % (region) + bias_score [region].name = "Bias Score %s" % (region) + if not skip_rmse: + rmse_val [region].name = "RMSE %s" % (region) + rmse_score [region].name = "RMSE Score %s" % (region) + if not skip_iav: + iav_score [region].name = "Interannual Variability Score %s" % (region) + if not skip_cycle: + ref_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) + ref_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) + com_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) + com_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) + shift [region].name = "Phase Shift %s" % (region) + shift_score [region].name = "Seasonal Cycle Score %s" % (region) + if spatial: + ref_union_mean[region].name = "Benchmark Period Mean (intersection) %s" % (region) + com_union_mean[region].name = "Model Period Mean (intersection) %s" % (region) + ref_comp_mean [region].name = "Benchmark Period Mean (complement) %s" % (region) + com_comp_mean [region].name = "Model Period Mean (complement) %s" % (region) + sd_score [region].name = "Spatial Distribution Score %s" % (region) + + # Unit conversions + def _convert(var,unit): + if type(var) == type({}): + for key in var.keys(): var[key].convert(unit) + else: + var.convert(unit) + + if table_unit is not None: + for var in [ref_period_mean,com_period_mean,ref_union_mean,com_union_mean,ref_comp_mean,com_comp_mean]: + _convert(var,table_unit) + if plot_unit is not None: + plot_vars = [com_timeint,ref_timeint,bias,com_spaceint,ref_spaceint,bias_val] + if not skip_rmse: plot_vars += [rmse,rmse_val] + if not skip_cycle: plot_vars += [com_mean_cycle,ref_mean_cycle,com_dtcycle,ref_dtcycle] + if not skip_iav: plot_vars += [com_iav] + for var in plot_vars: _convert(var,plot_unit) + + # Rename and optionally dump out information to netCDF4 files + com_timeint .name = "timeint_of_%s" % ref.name + bias .name = "bias_map_of_%s" % ref.name + bias_score_map .name = "biasscore_map_of_%s" % ref.name + + out_vars = [com_period_mean, + ref_union_mean, + com_union_mean, + ref_comp_mean, + com_comp_mean, + com_timeint, + com_mean_cycle, + com_dtcycle, + bias, + bias_score_map, + bias_val, + bias_score, + shift, + shift_score] + if com_spaceint[com_spaceint.keys()[0]].data.size > 1: out_vars.append(com_spaceint) + if not skip_cycle: + com_maxt_map .name = "phase_map_of_%s" % ref.name + shift_map .name = "shift_map_of_%s" % ref.name + shift_score_map.name = "shiftscore_map_of_%s" % ref.name + out_vars.append(com_maxt_map) + out_vars.append(shift_map) + out_vars.append(shift_score_map) + if not skip_rmse: + rmse .name = "rmse_map_of_%s" % ref.name + rmse_score_map.name = "rmsescore_map_of_%s" % ref.name + out_vars.append(rmse) + out_vars.append(rmse_score_map) + out_vars.append(rmse_val) + out_vars.append(rmse_score) + if not skip_iav: + com_iav.name = "iav_map_of_%s" % ref.name + iav_score_map.name = "iavscore_map_of_%s" % ref.name + out_vars.append(com_iav) + out_vars.append(iav_score_map) + out_vars.append(iav_score) + if dataset is not None: + for var in out_vars: + if type(var) == type({}): + for key in var.keys(): var[key].toNetCDF4(dataset,group="MeanState") + else: + var.toNetCDF4(dataset,group="MeanState") + for key in sd_score.keys(): + sd_score[key].toNetCDF4(dataset,group="MeanState", + attributes={"std":space_std[key].data, + "R" :space_cor[key].data}) + + # Rename and optionally dump out information to netCDF4 files + out_vars = [ref_period_mean,ref_timeint] + if ref_spaceint[ref_spaceint.keys()[0]].data.size > 1: out_vars.append(ref_spaceint) + ref_timeint .name = "timeint_of_%s" % ref.name + if not skip_cycle: + ref_maxt_map.name = "phase_map_of_%s" % ref.name + out_vars += [ref_maxt_map,ref_mean_cycle,ref_dtcycle] + if not skip_iav: + ref_iav.name = "iav_map_of_%s" % ref.name + out_vars.append(ref_iav) + if benchmark_dataset is not None: + for var in out_vars: + if type(var) == type({}): + for key in var.keys(): var[key].toNetCDF4(benchmark_dataset,group="MeanState") + else: + var.toNetCDF4(benchmark_dataset,group="MeanState") + + return + + +def AnalysisMeanStateSpace(ref,com,**keywords): + """Perform a mean state analysis. + + This mean state analysis examines the model mean state in space + and time. We compute the mean variable value over the time period + at each spatial cell or data site as appropriate, as well as the + bias and RMSE relative to the observational variable. We will + output maps of the period mean values and bias. For each spatial + cell or data site we also estimate the phase of the variable by + finding the mean time of year when the maximum occurs and the + phase shift by computing the difference in phase with respect to + the observational variable. In the spatial dimension, we compute a + spatial mean for each of the desired regions and an average annual + cycle. + + Parameters + ---------- + obs : ILAMB.Variable.Variable + the observational (reference) variable + mod : ILAMB.Variable.Variable + the model (comparison) variable + regions : list of str, optional + the regions overwhich to apply the analysis + dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the model + benchmark_dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the observations + space_mean : bool, optional + disable to compute sums of the variable over space instead of + mean values + table_unit : str, optional + the unit to use when displaying output in tables on the HTML page + plots_unit : str, optional + the unit to use when displaying output on plots on the HTML page + + """ + from Variable import Variable + regions = keywords.get("regions" ,["global"]) + dataset = keywords.get("dataset" ,None) + benchmark_dataset = keywords.get("benchmark_dataset",None) + space_mean = keywords.get("space_mean" ,True) + table_unit = keywords.get("table_unit" ,None) + plot_unit = keywords.get("plot_unit" ,None) + mass_weighting = keywords.get("mass_weighting" ,False) + skip_rmse = keywords.get("skip_rmse" ,False) + skip_iav = keywords.get("skip_iav" ,False) + skip_cycle = keywords.get("skip_cycle" ,False) + ILAMBregions = Regions() + spatial = ref.spatial + + # Convert str types to booleans + if type(skip_rmse) == type(""): + skip_rmse = (skip_rmse.lower() == "true") + if type(skip_iav ) == type(""): + skip_iav = (skip_iav .lower() == "true") + if type(skip_cycle) == type(""): + skip_cycle = (skip_cycle.lower() == "true") + + # Check if we need to skip parts of the analysis + if not ref.monthly : skip_cycle = True + if ref.time.size < 12: skip_cycle = True + if ref.time.size == 1: skip_rmse = True + if skip_rmse : skip_iav = True + name = ref.name + + # Interpolate both reference and comparison to a grid composed of + # their cell breaks + ref.convert(plot_unit) + com.convert(plot_unit) + lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) + REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + unit = REF.unit + area = REF.area + ndata = REF.ndata + + # Find the mean values over the time period + ref_timeint = ref.integrateInTime(mean=True).convert(plot_unit) + com_timeint = com.integrateInTime(mean=True).convert(plot_unit) + REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) + COM_timeint = COM.integrateInTime(mean=True).convert(plot_unit) + normalizer = REF_timeint.data if mass_weighting else None + + # Report period mean values over all possible representations of + # land + ref_and_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == False) + ref_not_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == True ) + com_not_ref = (REF_timeint.data.mask == True ) * (COM_timeint.data.mask == False) + if benchmark_dataset is not None: + + ref_timeint.name = "timeint_of_%s" % name + ref_timeint.toNetCDF4(benchmark_dataset,group="MeanState") + for region in regions: + + # reference period mean on original grid + ref_period_mean = ref_timeint.integrateInSpace(region=region,mean=space_mean).convert(table_unit) + ref_period_mean.name = "Period Mean (original grids) %s" % region + ref_period_mean.toNetCDF4(benchmark_dataset,group="MeanState") + + if dataset is not None: + + com_timeint.name = "timeint_of_%s" % name + com_timeint.toNetCDF4(dataset,group="MeanState") + for region in regions: + + # reference period mean on intersection of land + ref_union_mean = Variable(name = "REF_and_com", unit = REF_timeint.unit, + data = np.ma.masked_array(REF_timeint.data,mask=(ref_and_com==False)), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = REF_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) + ref_union_mean.name = "Benchmark Period Mean (intersection) %s" % region + ref_union_mean.toNetCDF4(dataset,group="MeanState") + + # reference period mean on complement of land + ref_comp_mean = Variable(name = "REF_not_com", unit = REF_timeint.unit, + data = np.ma.masked_array(REF_timeint.data,mask=(ref_not_com==False)), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = REF_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) + ref_comp_mean.name = "Benchmark Period Mean (complement) %s" % region + ref_comp_mean.toNetCDF4(dataset,group="MeanState") + + # comparison period mean on original grid + com_period_mean = com_timeint.integrateInSpace(region=region,mean=space_mean).convert(table_unit) + com_period_mean.name = "Period Mean (original grids) %s" % region + com_period_mean.toNetCDF4(dataset,group="MeanState") + + # comparison period mean on intersection of land + com_union_mean = Variable(name = "ref_and_COM", unit = COM_timeint.unit, + data = np.ma.masked_array(COM_timeint.data,mask=(ref_and_com==False)), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = COM_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) + com_union_mean.name = "Model Period Mean (intersection) %s" % region + com_union_mean.toNetCDF4(dataset,group="MeanState") + + # comparison period mean on complement of land + com_comp_mean = Variable(name = "COM_not_ref", unit = COM_timeint.unit, + data = np.ma.masked_array(COM_timeint.data,mask=(com_not_ref==False)), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = COM_timeint.area).integrateInSpace(region=region,mean=space_mean).convert(table_unit) + com_comp_mean.name = "Model Period Mean (complement) %s" % region + com_comp_mean.toNetCDF4(dataset,group="MeanState") + + # Now that we are done reporting on the intersection / complement, + # set all masks to the intersection + REF.data.mask += np.ones(REF.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) + COM.data.mask += np.ones(COM.time.size,dtype=bool)[:,np.newaxis,np.newaxis] * (ref_and_com==False) + REF_timeint.data.mask = (ref_and_com==False) + COM_timeint.data.mask = (ref_and_com==False) + if mass_weighting: normalizer.mask = (ref_and_com==False) + + # Spatial Distribution: scalars and scores + if dataset is not None: + for region in regions: + space_std,space_cor,sd_score = REF_timeint.spatialDistribution(COM_timeint,region=region) + sd_score.name = "Spatial Distribution Score %s" % region + sd_score.toNetCDF4(dataset,group="MeanState", + attributes={"std":space_std.data, + "R" :space_cor.data}) + + # Cycle: maps, scalars, and scores + if not skip_cycle: + ref_cycle = REF.annualCycle() + ref_maxt_map = ref_cycle.timeOfExtrema(etype="max") + ref_maxt_map.name = "phase_map_of_%s" % name + com_cycle = COM.annualCycle() + com_maxt_map = com_cycle.timeOfExtrema(etype="max") + com_maxt_map.name = "phase_map_of_%s" % name + shift_map = ref_maxt_map.phaseShift(com_maxt_map) + shift_map.name = "shift_map_of_%s" % name + shift_score_map = ScoreSeasonalCycle(shift_map) + shift_score_map.name = "shiftscore_map_of_%s" % name + shift_map.data /= 30.; shift_map.unit = "months" + if benchmark_dataset is not None: + ref_maxt_map.toNetCDF4(benchmark_dataset,group="MeanState") + for region in regions: + ref_mean_cycle = ref_cycle.integrateInSpace(region=region,mean=True) + ref_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) + ref_mean_cycle.toNetCDF4(benchmark_dataset,group="MeanState") + ref_dtcycle = deepcopy(ref_mean_cycle) + ref_dtcycle.data -= ref_mean_cycle.data.mean() + ref_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) + ref_dtcycle.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + com_maxt_map.toNetCDF4(dataset,group="MeanState") + shift_map .toNetCDF4(dataset,group="MeanState") + shift_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + com_mean_cycle = com_cycle.integrateInSpace(region=region,mean=True) + com_mean_cycle.name = "cycle_of_%s_over_%s" % (name,region) + com_mean_cycle.toNetCDF4(dataset,group="MeanState") + com_dtcycle = deepcopy(com_mean_cycle) + com_dtcycle.data -= com_mean_cycle.data.mean() + com_dtcycle.name = "dtcycle_of_%s_over_%s" % (name,region) + com_dtcycle.toNetCDF4(dataset,group="MeanState") + shift = shift_map.integrateInSpace(region=region,mean=True,intabs=True) + shift_score = shift_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + shift .name = "Phase Shift %s" % region + shift .toNetCDF4(dataset,group="MeanState") + shift_score.name = "Seasonal Cycle Score %s" % region + shift_score.toNetCDF4(dataset,group="MeanState") + + del ref_cycle,com_cycle,shift_map,shift_score_map + + # Bias: maps, scalars, and scores + bias = REF_timeint.bias(COM_timeint).convert(plot_unit) + cREF = Variable(name = "centralized %s" % name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, ndata = REF.ndata, + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, area = REF.area).convert(plot_unit) + REF_iav = cREF.rms() + if skip_rmse: del cREF + bias_score_map = Score(bias,REF_iav if REF.time.size > 1 else REF_timeint) + bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask + if dataset is not None: + bias.name = "bias_map_of_%s" % name + bias.toNetCDF4(dataset,group="MeanState") + bias_score_map.name = "biasscore_map_of_%s" % name + bias_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) + bias_val.name = "Bias %s" % region + bias_val.toNetCDF4(dataset,group="MeanState") + bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + bias_score.name = "Bias Score %s" % region + bias_score.toNetCDF4(dataset,group="MeanState") + del bias,bias_score_map + + # Spatial mean: plots + if REF.time.size > 1: + if benchmark_dataset is not None: + for region in regions: + ref_spaceint = REF.integrateInSpace(region=region,mean=True) + ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + ref_spaceint.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + for region in regions: + com_spaceint = COM.integrateInSpace(region=region,mean=True) + com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + com_spaceint.toNetCDF4(dataset,group="MeanState") + + # RMSE: maps, scalars, and scores + if not skip_rmse: + rmse = REF.rmse(COM).convert(plot_unit) + del REF + cCOM = Variable(name = "centralized %s" % name, unit = unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = COM.area, ndata = COM.ndata).convert(plot_unit) + del COM + crmse = cREF.rmse(cCOM).convert(plot_unit) + del cREF + if skip_iav: del cCOM + rmse_score_map = Score(crmse,REF_iav) + if dataset is not None: + rmse.name = "rmse_map_of_%s" % name + rmse.toNetCDF4(dataset,group="MeanState") + rmse_score_map.name = "rmsescore_map_of_%s" % name + rmse_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) + rmse_val.name = "RMSE %s" % region + rmse_val.toNetCDF4(dataset,group="MeanState") + rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + rmse_score.name = "RMSE Score %s" % region + rmse_score.toNetCDF4(dataset,group="MeanState") + del rmse,crmse,rmse_score_map + + # IAV: maps, scalars, scores + if not skip_iav: + COM_iav = cCOM.rms() + del cCOM + iav_score_map = Score(Variable(name = "diff %s" % name, unit = unit, + data = (COM_iav.data-REF_iav.data), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = area, ndata = ndata), + REF_iav) + if benchmark_dataset is not None: + REF_iav.name = "iav_map_of_%s" % name + REF_iav.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + COM_iav.name = "iav_map_of_%s" % name + COM_iav.toNetCDF4(dataset,group="MeanState") + iav_score_map.name = "iavscore_map_of_%s" % name + iav_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + iav_score = iav_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + iav_score.name = "Interannual Variability Score %s" % region + iav_score.toNetCDF4(dataset,group="MeanState") + del COM_iav,iav_score_map + del REF_iav + + return + +def ClipTime(v,t0,tf): + """Remove time from a variable based on input bounds. + + Parameters + ---------- + v : ILAMB.Variable.Variable + the variable to trim + t0,tf : float + the times at which to trim + + Returns + ------- + vtrim : ILAMB.Variable.Variable + the trimmed variable + """ + begin = np.argmin(np.abs(v.time_bnds[:,0]-t0)) + end = np.argmin(np.abs(v.time_bnds[:,1]-tf)) + while v.time_bnds[begin,0] > t0: + begin -= 1 + if begin <= 0: + begin = 0 + break + while v.time_bnds[end, 1] < tf: + end += 1 + if end >= v.time.size-1: + end = v.time.size-1 + break + v.time = v.time [begin:(end+1) ] + v.time_bnds = v.time_bnds[begin:(end+1),...] + v.data = v.data [begin:(end+1),...] + return v + +def MakeComparable(ref,com,**keywords): + r"""Make two variables comparable. + + Given a reference variable and a comparison variable, make the two + variables comparable or raise an exception explaining why they are + not. + + Parameters + ---------- + ref : ILAMB.Variable.Variable + the reference variable object + com : ILAMB.Variable.Variable + the comparison variable object + clip_ref : bool, optional + enable in order to clip the reference variable time using the + limits of the comparison variable (defult is False) + mask_ref : bool, optional + enable in order to mask the reference variable using an + interpolation of the comparison variable (defult is False) + eps : float, optional + used to determine how close you can be to a specific time + (expressed in days since 1-1-1850) and still be considered the + same time (default is 30 minutes) + window : float, optional + specify to extend the averaging intervals (in days since + 1-1-1850) when a variable must be coarsened (default is 0) + + Returns + ------- + ref : ILAMB.Variable.Variable + the modified reference variable object + com : ILAMB.Variable.Variable + the modified comparison variable object + + """ + # Process keywords + clip_ref = keywords.get("clip_ref" ,False) + mask_ref = keywords.get("mask_ref" ,False) + eps = keywords.get("eps" ,30./60./24.) + window = keywords.get("window" ,0.) + extents = keywords.get("extents" ,np.asarray([[-90.,+90.],[-180.,+180.]])) + logstring = keywords.get("logstring","") + + # If one variable is temporal, then they both must be + if ref.temporal != com.temporal: + msg = "%s Datasets are not uniformly temporal: " % logstring + msg += "reference = %s, comparison = %s" % (ref.temporal,com.temporal) + logger.debug(msg) + raise VarsNotComparable() + + # If the reference is spatial, the comparison must be + if ref.spatial and not com.spatial: + ref = ref.extractDatasites(com.lat,com.lon) + msg = "%s The reference dataset is spatial but the comparison is site-based. " % logstring + msg += "Extracted %s sites from the reference to match the comparison." % ref.ndata + logger.info(msg) + + # If the reference is layered, the comparison must be + if ref.layered and not com.layered: + if ref.depth.size == 1: + com.layered = True + com.depth = ref.depth + com.depth_bnds = ref.depth_bnds + shp = list(com.data.shape) + insert = 0 + if com.temporal: insert = 1 + shp.insert(insert,1) + com.data = com.data.reshape(shp) + else: + msg = "%s Datasets are not uniformly layered: " % logstring + msg += "reference = %s, comparison = %s" % (ref.layered,com.layered) + logger.debug(msg) + raise NotLayeredVariable() + + # If the reference represents observation sites, extract them from + # the comparison + if ref.ndata is not None and com.spatial: com = com.extractDatasites(ref.lat,ref.lon) + + # If both variables represent observations sites, make sure you + # have the same number of sites and that they represent the same + # location. Note this is after the above extraction so at this + # point the ndata field of both variables should be equal. + if ref.ndata != com.ndata: + msg = "%s One or both datasets are understood as site data but differ in number of sites: " % logstring + msg += "reference = %d, comparison = %d" % (ref.ndata,com.ndata) + logger.debug(msg) + raise VarsNotComparable() + if ref.ndata is not None: + if not (np.allclose(ref.lat,com.lat) or np.allclose(ref.lon,com.lon)): + msg = "%s Datasets represent sites, but the locations are different: " % logstring + msg += "maximum difference lat = %.f, lon = %.f" % (np.abs((ref.lat-com.lat)).max(), + np.abs((ref.lon-com.lon)).max()) + logger.debug(msg) + raise VarsNotComparable() + + # If the datasets are both spatial, ensure that both represent the + # same spatial area and trim the datasets if not. + if ref.spatial and com.spatial: + + lat_bnds = (max(ref.lat_bnds[ 0,0],com.lat_bnds[ 0,0],extents[0,0]), + min(ref.lat_bnds[-1,1],com.lat_bnds[-1,1],extents[0,1])) + lon_bnds = (max(ref.lon_bnds[ 0,0],com.lon_bnds[ 0,0],extents[1,0]), + min(ref.lon_bnds[-1,1],com.lon_bnds[-1,1],extents[1,1])) + + # Clip reference + diff = np.abs([ref.lat_bnds[[0,-1],[0,1]]-lat_bnds, + ref.lon_bnds[[0,-1],[0,1]]-lon_bnds]) + if diff.sum() >= 5.: + shp0 = np.asarray(np.copy(ref.data.shape),dtype=int) + ref.trim(lat=lat_bnds,lon=lon_bnds) + shp = np.asarray(np.copy(ref.data.shape),dtype=int) + msg = "%s Spatial data was clipped from the reference: " % logstring + msg += " before: %s" % (shp0) + msg += " after: %s" % (shp ) + logger.info(msg) + + # Clip comparison + diff = np.abs([com.lat_bnds[[0,-1],[0,1]]-lat_bnds, + com.lon_bnds[[0,-1],[0,1]]-lon_bnds]) + if diff.sum() >= 5.: + shp0 = np.asarray(np.copy(com.data.shape),dtype=int) + com.trim(lat=lat_bnds,lon=lon_bnds) + shp = np.asarray(np.copy(com.data.shape),dtype=int) + msg = "%s Spatial data was clipped from the comparison: " % logstring + msg += " before: %s" % (shp0) + msg += " after: %s" % (shp ) + logger.info(msg) + + + if ref.temporal: + + # If the reference time scale is significantly larger than the + # comparison, coarsen the comparison + if np.log10(ref.dt/com.dt) > 0.5: + com = com.coarsenInTime(ref.time_bnds,window=window) + + # Time bounds of the reference dataset + t0 = ref.time_bnds[ 0,0] + tf = ref.time_bnds[-1,1] + + # Find the comparison time range which fully encompasses the reference + com = ClipTime(com,t0,tf) + + if clip_ref: + + # We will clip the reference dataset too + t0 = max(t0,com.time_bnds[ 0,0]) + tf = min(tf,com.time_bnds[-1,1]) + ref = ClipTime(ref,t0,tf) + + else: + + # The comparison dataset needs to fully cover the reference in time + if (com.time_bnds[ 0,0] > (t0+eps) or + com.time_bnds[-1,1] < (tf-eps)): + msg = "%s Comparison dataset does not cover the time frame of the reference: " % logstring + msg += " t0: %.16e <= %.16e (%s)" % (com.time_bnds[0, 0],t0+eps,com.time_bnds[0, 0] <= (t0+eps)) + msg += " tf: %.16e >= %.16e (%s)" % (com.time_bnds[1,-1],tf-eps,com.time_bnds[1,-1] >= (tf-eps)) + logger.debug(msg) + raise VarsNotComparable() + + # Check that we now are on the same time intervals + if ref.time.size != com.time.size: + msg = "%s Datasets have differing numbers of time intervals: " % logstring + msg += "reference = %d, comparison = %d" % (ref.time.size,com.time.size) + logger.debug(msg) + raise VarsNotComparable() + if not np.allclose(ref.time_bnds,com.time_bnds,atol=0.75*ref.dt): + msg = "%s Datasets are defined at different times" % logstring + logger.debug(msg) + raise VarsNotComparable() + + if ref.layered: + + # Try to resolve if the layers from the two quantities are + # different + if ref.depth.size == com.depth.size == 1: + ref = ref.integrateInDepth(mean = True) + com = com.integrateInDepth(mean = True) + elif ref.depth.size != com.depth.size: + # Compute the mean values from the comparison over the + # layer breaks of the reference. + if ref.depth.size == 1 and com.depth.size > 1: + com = com.integrateInDepth(z0=ref.depth_bnds[ 0,0], + zf=ref.depth_bnds[-1,1], + mean = True) + ref = ref.integrateInDepth(mean = True) # just removing the depth dimension + else: + if not np.allclose(ref.depth,com.depth): + msg = "%s Datasets have a different layering scheme" % logstring + logger.debug(msg) + raise VarsNotComparable() + + # Convert the comparison to the units of the reference + com = com.convert(ref.unit) + + return ref,com + + +def CombineVariables(V): + """Combines a list of variables into a single variable. + + This routine is intended to be used to merge variables when + separate moments in time are scattered over several files. + + Parameters + ---------- + V : list of ILAMB.Variable.Variable + a list of variables to merge into a single variable + + Returns + ------- + v : ILAMB.Variable.Variable + the merged variable + """ + from Variable import Variable + + # checks on data + assert type(V) == type([]) + for v in V: assert v.temporal + if len(V) == 1: return V[0] + + # Put list in order by initial time + V.sort(key=lambda v: v.time[0]) + + # Check the beginning and ends times for monotonicity + nV = len(V) + t0 = np.zeros(nV) + tf = np.zeros(nV) + nt = np.zeros(nV,dtype=int) + ind = [0] + for i,v in enumerate(V): + t0[i] = v.time[ 0] + tf[i] = v.time[-1] + nt[i] = v.time.size + ind.append(nt[:(i+1)].sum()) + + # Checks on monotonicity + assert (t0[1:]-t0[:-1]).min() >= 0 + assert (tf[1:]-tf[:-1]).min() >= 0 + assert (t0[1:]-tf[:-1]).min() >= 0 + + # Assemble the data + shp = (nt.sum(),)+V[0].data.shape[1:] + time = np.zeros(shp[0]) + time_bnds = np.zeros((shp[0],2)) + data = np.zeros(shp) + mask = np.zeros(shp,dtype=bool) + for i,v in enumerate(V): + time [ind[i]:ind[i+1]] = v.time + time_bnds[ind[i]:ind[i+1],...] = v.time_bnds + data [ind[i]:ind[i+1],...] = v.data + mask [ind[i]:ind[i+1],...] = v.data.mask + + # If assembled from single slice files and no time bounds were + # provided, they will not be reflective of true bounds here. If + # any dt's are 0, make time_bounds none and recompute in the + # constructor. + if np.any((time_bnds[:,1]-time_bnds[:,0])<1e-12): time_bnds = None + + v = V[0] + return Variable(data = np.ma.masked_array(data,mask=mask), + unit = v.unit, + name = v.name, + time = time, + time_bnds = time_bnds, + depth = v.depth, + depth_bnds = v.depth_bnds, + lat = v.lat, + lon = v.lon, + area = v.area, + ndata = v.ndata) + +def ConvertBoundsTypes(x): + y = None + if x.ndim == 2: + y = np.zeros(x.shape[0]+1) + y[:-1] = x[ :, 0] + y[ -1] = x[-1,-1] + if x.ndim == 1: + y = np.zeros((x.shape[0]-1,2)) + y[:,0] = x[:-1] + y[:,1] = x[+1:] + return y + +def LandLinInterMissingValues(mdata): + land = np.any(mdata.mask,axis=0)==False + data = np.ma.masked_array(mdata) + data.data[data.mask] = 0. + data.fill_value = 0. + data = data.data + land = land.astype(int) + smooth = data*land[np.newaxis,...] + suml = np.copy(land) + smooth[:,1:-1,1:-1] += data[:, :-2, :-2]*land[np.newaxis, :-2, :-2] + suml [ 1:-1,1:-1] += land[ :-2, :-2] + smooth[:,1:-1,1:-1] += data[:, :-2,1:-1]*land[np.newaxis, :-2,1:-1] + suml [ 1:-1,1:-1] += land[ :-2,1:-1] + smooth[:,1:-1,1:-1] += data[:, :-2, +2:]*land[np.newaxis, :-2, +2:] + suml [ 1:-1,1:-1] += land[ :-2, +2:] + smooth[:,1:-1,1:-1] += data[:,1:-1, :-2]*land[np.newaxis,1:-1, :-2] + suml [ 1:-1,1:-1] += land[ 1:-1, :-2] + smooth[:,1:-1,1:-1] += data[:,1:-1, +2:]*land[np.newaxis,1:-1, +2:] + suml [ 1:-1,1:-1] += land[ 1:-1, +2:] + smooth[:,1:-1,1:-1] += data[:, +2:, :-2]*land[np.newaxis, +2:, :-2] + suml [ 1:-1,1:-1] += land[ +2:, :-2] + smooth[:,1:-1,1:-1] += data[:, +2:,1:-1]*land[np.newaxis, +2:,1:-1] + suml [ 1:-1,1:-1] += land[ +2:,1:-1] + smooth[:,1:-1,1:-1] += data[:, +2:, +2:]*land[np.newaxis, +2:, +2:] + suml [ 1:-1,1:-1] += land[ +2:, +2:] + smooth /= suml.clip(1) + smooth = (mdata.mask==True)*smooth + (mdata.mask==False)*mdata.data + return smooth diff --git a/test/makefile b/test/makefile new file mode 100644 index 00000000..a777a097 --- /dev/null +++ b/test/makefile @@ -0,0 +1,15 @@ +NP = 1 + +test_sample: + @mpirun -n ${NP} ilamb-run \ + --config test.cfg \ + --model_root ${ILAMB_ROOT}/MODELS/CLM/ \ + --models CLM50r243CRUNCEP CLM50r243GSWP3 \ + --regions global shsa + python score_diff.py scores_test.csv.gold _build/scores.csv + +test_run_script: + nosetests test_run_script.py + +clean: + @rm -rf _build diff --git a/test/score_diff.py b/test/score_diff.py new file mode 100644 index 00000000..7ec0c82a --- /dev/null +++ b/test/score_diff.py @@ -0,0 +1,18 @@ +import numpy as np +import sys +if len(sys.argv) != 3: + print "usage: python score_diff.py scores1.csv scores2.csv" + sys.exit(1) +gold = np.recfromcsv(sys.argv[1]) +test = np.recfromcsv(sys.argv[2]) +assert gold.dtype == test.dtype +ok = True +for model in gold.dtype.names[1:]: + if not np.allclose(test[model],gold[model]): + ok = False + diff = np.abs(test[model]-gold[model])/gold[model] + for i in range(diff.size): + if diff[i] > 1e-12: + print "%s | %s | %.6f%% " % (gold['variables'][i],model,diff[i]*100.) +if not ok: sys.exit(1) +print "Test passed" diff --git a/test/scores_test.csv.gold b/test/scores_test.csv.gold new file mode 100644 index 00000000..6fccfb93 --- /dev/null +++ b/test/scores_test.csv.gold @@ -0,0 +1,9 @@ +Variables,CLM50r243CRUNCEP,CLM50r243GSWP3 +Biomass,0.5957104653413856,0.6783045750117078 +Gross Primary Productivity,0.6217211297637607,0.6126273585798891 +Global Net Ecosystem Carbon Balance,0.7054000637266042,0.8636690794621101 +Net Ecosystem Exchange,0.3941918077804778,0.38120476926634617 +Terrestrial Water Storage Anomaly,0.7000653021257858,0.7269702240175762 +Albedo,0.5434663466148166,0.544587485316599 +Surface Air Temperature,0.9256731031865132,0.9314748385926337 +Precipitation,0.7555153501937276,0.7679655805094326 diff --git a/test/test.cfg b/test/test.cfg new file mode 100644 index 00000000..57446284 --- /dev/null +++ b/test/test.cfg @@ -0,0 +1,134 @@ + +[h1: Ecosystem and Carbon Cycle] +bgcolor = "#ECFFE6" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Biomass] +variable = "biomass" +alternate_vars = "cVeg" +weight = 5 +skip_rmse = True +mass_weighting = True + +[GlobalCarbon] +source = "DATA/biomass/GLOBAL.CARBON/biomass_0.5x0.5.nc" +weight = 16 +table_unit = "Pg" +plot_unit = "kg m-2" +space_mean = False + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Gross Primary Productivity] +variable = "gpp" +cmap = "Greens" +weight = 5 +mass_weighting = True + +[Fluxnet] +source = "DATA/gpp/FLUXNET/gpp.nc" +weight = 9 +table_unit = "g m-2 d-1" +plot_unit = "g m-2 d-1" + +[GBAF] +source = "DATA/gpp/GBAF/gpp_0.5x0.5.nc" +weight = 15 +table_unit = "Pg yr-1" +plot_unit = "g m-2 d-1" +space_mean = False +skip_iav = True +relationships = "Precipitation/GPCP2","SurfaceAirTemperature/CRU" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Global Net Ecosystem Carbon Balance] +variable = "nbp" +weight = 5 +ctype = "ConfNBP" + +[Hoffman] +source = "DATA/nbp/HOFFMAN/nbp_1850-2010.nc" +weight = 20 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Net Ecosystem Exchange] +variable = "nee" +derived = "gpp-ra-rh" +weight = 5 +mass_weighting = True + +[GBAF] +source = "DATA/nee/GBAF/nee_0.5x0.5.nc" +weight = 4 +table_unit = "Pg yr-1" +plot_unit = "g m-2 d-1" +space_mean = False +skip_iav = True + +########################################################################### + +[h1: Hydrology Cycle] +bgcolor = "#E6F9FF" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Terrestrial Water Storage Anomaly] +variable = "twsa" +alternate_vars = "tws" +cmap = "Blues" +weight = 5 +ctype = "ConfTWSA" + +[GRACE] +source = "DATA/twsa/GRACE/twsa_0.5x0.5.nc" +weight = 25 + +########################################################################### + +[h1: Radiation and Energy Cycle] +bgcolor = "#FFECE6" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Albedo] +variable = "albedo" +derived = "rsus/rsds" +weight = 1 + +[MODIS] +source = "DATA/albedo/MODIS/albedo_0.5x0.5.nc" +weight = 20 + +########################################################################### + +[h1: Forcings] +bgcolor = "#EDEDED" + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Surface Air Temperature] +variable = "tas" +weight = 2 + +[CRU] +source = "DATA/tas/CRU/tas_0.5x0.5.nc" +weight = 25 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Precipitation] +variable = "pr" +cmap = "Blues" +weight = 2 +mass_weighting = True + +[GPCP2] +source = "DATA/pr/GPCP2/pr_0.5x0.5.nc" +land = True +weight = 20 +table_unit = "mm d-1" +plot_unit = "mm d-1" +space_mean = True diff --git a/test/test_Variable.py b/test/test_Variable.py new file mode 100644 index 00000000..dd01d319 --- /dev/null +++ b/test/test_Variable.py @@ -0,0 +1,157 @@ +from ILAMB.Variable import Variable +import ILAMB.ilamblib as il +import numpy as np +import os + +def test_integrateInTime(variables): + head = "\n--- Testing integrateInTime() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + vdict["timeint"] = var.integrateInTime() + vdict["timeint_mean"] = var.integrateInTime(mean=True) + print vdict["timeint"] + print vdict["timeint_mean"] + except il.NotTemporalVariable: + pass + +def test_integrateInSpace(variables): + head = "\n--- Testing integrateInSpace() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + vdict["spaceint"] = var.integrateInSpace() + vdict["spaceint_mean"] = var.integrateInSpace(mean=True) + vdict["spaceint_amazon"] = var.integrateInSpace(region="amazon") + vdict["spaceint_amazon_mean"] = var.integrateInSpace(region="amazon",mean=True) + print vdict["spaceint"] + print vdict["spaceint_mean"] + print vdict["spaceint_amazon"] + print vdict["spaceint_amazon_mean"] + except il.NotSpatialVariable: + pass + +def test_annualCycle(variables): + head = "\n--- Testing annualCycle() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + # note: not testing std, max, and min (assuming ok since functions are similar) + vdict["cycle"],junk,junk,junk = var.annualCycle() + print vdict["cycle"] + except il.NotTemporalVariable: + pass + +def test_timeOfExtrema(variables): + head = "\n--- Testing timeOfExtrema() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + # note: not testing min time (assuming ok since functions are similar) + vdict["maxtime"] = var.timeOfExtrema() + print vdict["maxtime"] + except il.NotTemporalVariable: + pass + +def test_interpolate(variables): + head = "\n--- Testing interpolate() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + if var.temporal: + t = np.linspace(var.time.min(),var.time.max(),10) + vdict["interp1"] = var.interpolate(time=t) + if var.spatial: + lat = np.linspace(- 90, 90,30) + lon = np.linspace(-180,180,60) + vdict["interp2"] = var.interpolate(lat=lat,lon=lon) + vdict["interp3"] = var.interpolate(lat=lat) + vdict["interp4"] = var.interpolate(lon=lon) + if var.temporal and var.spatial: + vdict["interp5"] = var.interpolate(time=t,lat=lat,lon=lon) + vdict["interp6"] = var.interpolate(time=t,lat=lat) + vdict["interp7"] = var.interpolate(time=t,lon=lon) + for key in vdict.keys(): + if "interp" in key: print vdict[key] + +def test_phaseShift(variables): + head = "\n--- Testing phaseShift() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + vdict["shift"] = var.phaseShift(var) + vdict["shift_min"] = var.phaseShift(var,method="min_of_annual_cycle") + print vdict["shift"] + print vdict["shift_min"] + if vdict.has_key("cycle"): + vdict["shift_fast"] = vdict["cycle"].phaseShift(vdict["cycle"]) + print vdict["shift_fast"] + except il.NotTemporalVariable: + pass + +def test_correlation(variables): + head = "\n--- Testing correlation() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + if var.spatial or var.ndata: + vdict["corr_spatial"] = var.correlation(var,"spatial") + print vdict["corr_spatial"] + if var.temporal: + vdict["corr_temporal"] = var.correlation(var,"temporal") + print vdict["corr_temporal"] + if var.spatial and var.temporal: + vdict["corr_both"] = var.correlation(var,"spatiotemporal") + print vdict["corr_both"] + + except il.NotTemporalVariable: + pass + +def test_bias(variables): + head = "\n--- Testing bias() " + print "%s%s\n" % (head,"-"*(120-len(head))) + for vdict in variables: + var = vdict["var"] + try: + vdict["bias"] = var.bias(var) + print vdict["bias"] + except il.NotSpatialVariable: + pass + +# Setup different types of variables +gpp = {} +gpp["var"] = Variable(filename = os.environ["ILAMB_ROOT"]+"/DATA/gpp/FLUXNET-MTE/derived/gpp.nc", + variable_name = "gpp") +le = {} +le["var"] = Variable(filename = os.environ["ILAMB_ROOT"]+"/DATA/le/FLUXNET/derived/le.nc", + variable_name = "le") +co2 = {} +co2["var"] = Variable(filename = os.environ["ILAMB_ROOT"]+"/DATA/co2/MAUNA.LOA/derived/co2_1959-2013.nc", + variable_name = "co2") +pi = {} +pi["var"] = Variable(data = np.pi, + unit = "-", + name = "pi") + +variables = [gpp,le,co2,pi] + +head = "\n--- Found the following variables for testing " +print "%s%s\n" % (head,"-"*(120-len(head))) +for vdict in variables: + print vdict["var"] + +test_integrateInTime(variables) +test_integrateInSpace(variables) +test_annualCycle(variables) +test_timeOfExtrema(variables) +test_interpolate(variables) +test_phaseShift(variables) +test_correlation(variables) +test_bias(variables) + diff --git a/test/test_run_script.py b/test/test_run_script.py new file mode 100644 index 00000000..46bd6bc8 --- /dev/null +++ b/test/test_run_script.py @@ -0,0 +1,17 @@ +"""Nosetests for the ILAMB run script.""" +import os +import subprocess +from nose.tools import assert_equal, raises + + +run_cmd = 'ilamb-run' + + +def test_help_argument(): + r = subprocess.call([run_cmd, '--help']) + assert_equal(r, 0) + + +@raises(subprocess.CalledProcessError) +def test_config_argument_not_set(): + r = subprocess.check_call([run_cmd]) From 75346e664b1007cc9ceb2b9e574589c38acd474c Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Thu, 16 Aug 2018 16:58:27 -0600 Subject: [PATCH 18/22] add ilamb/Makefile after updating subtree to ILAMB v2.3 --- ilamb/Makefile | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 ilamb/Makefile diff --git a/ilamb/Makefile b/ilamb/Makefile new file mode 100644 index 00000000..9ec599b3 --- /dev/null +++ b/ilamb/Makefile @@ -0,0 +1,35 @@ +all : develop + +test : FORCE + cd ilamb/test; python -c "import ILAMB; print ILAMB.__version__" + +develop : install + +# NOTE: need to cd to ilamb dir first before setup because +# the Makefile to build and install in the virtualenv is +# different from the distibution Makefile. + +install : FORCE + cd ilamb; python setup.py install + +# +clean : + -rm -f *~ *.CKP *.ln *.BAK *.bak .*.bak \ + core errs \ + ,* .emacs_* \ + tags TAGS \ + make.log MakeOut \ + *.tmp tmp.txt + +# +# clobber - Really clean up the directory. +# +clobber : clean + -rm -f .Makedepend *.o *.mod *.il *.pyc + -rm -rf ilamb/*.egg-info + -rm -rf ilamb/build + +# +# FORCE - Null rule to force things to happen. +# +FORCE : From 0626b8863adffa33c9cccfaa74a22aff9511fa45 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Tue, 4 Sep 2018 10:50:50 -0600 Subject: [PATCH 19/22] update machine libraries for za compilation; add --add-dav option to create_postprocess to accept a CLA with a fully qualified path to the DAV CESM postprocessing root directory --- Machines/machine_postprocess.xml | 4 +-- cesm_utils/cesm_utils/create_postprocess | 34 +++++++++++++++--------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/Machines/machine_postprocess.xml b/Machines/machine_postprocess.xml index 13c2c282..7a44e200 100644 --- a/Machines/machine_postprocess.xml +++ b/Machines/machine_postprocess.xml @@ -133,8 +133,8 @@ ifort -c -g -O2 - -I/glade/u/apps/ch/opt/netcdf/4.4.1.1/intel/16.0.3/include - -L/glade/u/apps/ch/opt/netcdf/4.4.1.1/intel/16.0.3/lib -lnetcdff -lnetcdf + -I/glade/u/apps/ch/opt/netcdf/4.6.1/intel/17.0.1/include + -L/glade/u/apps/ch/opt/netcdf/4.6.1/intel/17.0.1/lib -lnetcdff -lnetcdf module purge diff --git a/cesm_utils/cesm_utils/create_postprocess b/cesm_utils/cesm_utils/create_postprocess index a62084b8..4ba8749a 100755 --- a/cesm_utils/cesm_utils/create_postprocess +++ b/cesm_utils/cesm_utils/create_postprocess @@ -58,7 +58,7 @@ try: except KeyError: err_msg = ('create_postprocess ERROR: please set the POSTPROCESS_PATH environment variable.' \ ' For example on cheyenne: setenv POSTPROCESS_PATH /glade/p/cesm/postprocessing_ch' \ - ' In addition, for DAV support: setenv POSTPROCESS_PATH_DAV /glade/p/cesm/postprocessing_dav') + ' In addition, for NCAR DAV support use the --add-dav command line option.') raise OSError(err_msg) cesm_pp_path = os.environ["POSTPROCESS_PATH"] @@ -130,6 +130,18 @@ def commandline_options(): parser.add_argument('-username', '--username', nargs=1, required=False, help='User name (optional). Defaults to user login name.') + parser.add_argument('-add-dav', '--add-dav', dest='add_dav', nargs=1, required=False, + help='Fully qualified path to the root of the CESM postprocessing ' \ + 'virtualenv for the NCAR DAV cluster. This option sets the XML ' \ + 'variable POSTPROCESS_PATH_DAV in env_postprocess.xml and ' \ + 'creates all the necessary postprocessing batch scripts for the ' \ + 'NCAR DAV Slurm manager. This option is only available when create_postprocess ' \ + 'is run on NCAR machine cheyenne. A set of batch submission scripts in the ' \ + 'postprocessing caseroot with the "_dav" extension are included along side '\ + 'the cheyenne PBS submission scripts. '\ + 'Example: /glade/p/cesm/postprocessing_dav. '\ + 'Defaults to "undefined". (optional)') + options = parser.parse_args() return options @@ -577,6 +589,13 @@ def initialize_main(envDict, options, standalone): if options.username: envDict['USER_NAME'] = options.username[0] + # set the POSTPROCESS_PATH_DAV if option add-dav is specified + envDict['POSTPROCESS_PATH_DAV'] = 'undefined' + if options.add_dav: + # check to make sure virtualenv exists + if os.path.isfile('{0}/cesm-env2/bin/activate_this.py'.format(options.add_dav[0])): + envDict['POSTPROCESS_PATH_DAV'] = options.add_dav[0] + return envDict # ------------------------------------------------------------------------------- @@ -620,15 +639,6 @@ def main(options): if not envDict['MACH']: raise OSError('create_postprocess ERROR: hostname "{0}" is not currently supported. Exiting...'.format(hostname)) - # check if env POSTPROCESS_PATH_DAV needs to be set - if (envDict['MACH'] == 'cheyenne' or envDict['MACH'] == 'dav'): - try: - envDict["POSTPROCESS_PATH_DAV"] = os.environ["POSTPROCESS_PATH_DAV"] - except KeyError: - err_msg = ('create_postprocess ERROR: please set the POSTPROCESS_PATH_DAV environment variable.' \ - ' For example, setenv POSTPROCESS_PATH_DAV /glade/p/cesm/postprocessing_dav') - raise OSError(err_msg) - # make the appropriate dirs in the caseroot try: os.mkdir(pp_case_path) @@ -888,10 +898,10 @@ def main(options): imb_options='{{ imb_options }}') # check if machine is cheyenne then create a set of dav submission scripts - if envDict['MACH'] == 'cheyenne': + if envDict['MACH'] == 'cheyenne' and envDict['POSTPROCESS_PATH_DAV'] != 'undefined': hostname = 'dav' envDict['MACH'] = cesmEnvLib.get_machine_name(hostname, '{0}/Machines/machine_postprocess.xml'.format(envDict['POSTPROCESS_PATH'])) - pp_dav = os.environ["POSTPROCESS_PATH_DAV"] + pp_dav = envDict["POSTPROCESS_PATH_DAV"] # get the machine dependent variables, modules and mpi run command in a dictionary machine = dict() From 90aba9a763e0aa3a917f1614097376f54573951b Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Fri, 7 Sep 2018 12:01:57 -0600 Subject: [PATCH 20/22] add optional CLA --use-ssh-key to error out if passwordless ssh key access is not available instead of just printing and error message (default); add a serial test for ice averages to test the new PyNIO v1.5.3 behavior of closing a Nio file upon exiting a function scope --- Tools/copy_html | 23 ++++++++++--- averager/pp_tests/serial_ice_slice.py | 48 +++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 5 deletions(-) create mode 100755 averager/pp_tests/serial_ice_slice.py diff --git a/Tools/copy_html b/Tools/copy_html index 35660681..16768657 100755 --- a/Tools/copy_html +++ b/Tools/copy_html @@ -112,6 +112,15 @@ def commandline_options(): parser.add_argument('-debug', '--debug', nargs=1, required=False, type=int, default=0, help='debugging verbosity level output: 0 = none, 1 = minimum, 2 = maximum. 0 is default') + parser.add_argument('--use-ssh-key', dest='use_ssh_key', action='store_true', + help='Use a ssh key to connect to the remove web host defined by '\ + 'XML variables "GLOBAL_WEBHOST" and "GLOBAL_WEBLOGIN". '\ + 'If a ssh key is not present, then this option will cause '\ + 'execution to stop as opposed to issuing a warning '\ + 'and prompting for a password multiple times. '\ + 'More details about how to create ssh keys is available at '\ + '"http://tools.cgd.ucar.edu/make_user_ssh_keys/index.html"') + options = parser.parse_args() return options @@ -197,7 +206,7 @@ def get_years(env, comp): #======================================================================= # check_ssh_key #======================================================================= -def check_ssh_key(env): +def check_ssh_key(env, use_ssh_key): # check if ssh key is set for passwordless access to the web host try: @@ -205,8 +214,12 @@ def check_ssh_key(env): stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as e: - print('WARNING: unable to connect to remote web host {0}@{1} without a password'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'])) - print(' You will be prompted for a password multiple times in order to copy the files.') + if use_ssh_key: + print('ERROR: unable to connect to remote web host {0}@{1} without a password'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'])) + sys.exit(1) + else: + print('WARNING: unable to connect to remote web host {0}@{1} without a password'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'])) + print(' You will be prompted for a password multiple times in order to copy the files.') #======================================================================= @@ -559,8 +572,8 @@ def main(options): # load the env with all the env file entries env = cesmEnvLib.readXML(pp_caseroot, envFileList) - # check if sshkey is set - check_ssh_key(env) + # check if ssh key is set + check_ssh_key(env, options.use_ssh_key) # copy the different diag component web files for comp in compList: diff --git a/averager/pp_tests/serial_ice_slice.py b/averager/pp_tests/serial_ice_slice.py new file mode 100755 index 00000000..946ddf60 --- /dev/null +++ b/averager/pp_tests/serial_ice_slice.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +from pyaverager import PyAverager, specification, PreProc +import os + +#### User modify #### + +in_dir='/glade/scratch/aliceb/b.e21.B1850.f09_g17.CMIP6-piControl.001/ice/hist/' +out_dir= '/glade/scratch/aliceb/b.e21.B1850.f09_g17.CMIP6-piControl.001/ice/slice/' +pref= 'b.e21.B1850.f09_g17.CMIP6-piControl.001.cice.h' +htype= 'slice' +average= ['ya:355','jfm:351:355'] +wght= False +ncfrmt = 'netcdf' +serial=True + +suffix= 'nc' +date_pattern= 'yyyymm-yyyymm' +clobber = True + +ice_obs_file = '/glade/p/cesm/omwg/grids/gx1v7_grid.nc' +reg_file ='/glade/p/cesm/pcwg/ice/data/REGION_MASK_gx1v7.nc' +year0 = 351 +year1 = 355 +ncl_location = '/glade/work/aliceb/sandboxes/dev/postprocessing_new/ice_diag//code/' + +#### End user modify #### + +pyAveSpecifier = specification.create_specifier(in_directory=in_dir, + out_directory=out_dir, + prefix=pref, + suffix=suffix, + date_pattern=date_pattern, + hist_type=htype, + avg_list=average, + weighted=wght, + ncformat=ncfrmt, + serial=serial, + ice_obs_file=ice_obs_file, + reg_file=reg_file, + year0=year0, + year1=year1, + clobber=clobber, + ncl_location=ncl_location) + +PreProc.run_pre_proc(pyAveSpecifier) +PyAverager.run_pyAverager(pyAveSpecifier) + From 90019fbcbadc668297444c80b5995b53c707ea09 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Fri, 7 Sep 2018 12:04:52 -0600 Subject: [PATCH 21/22] Squashed 'averager/pyAverager/' changes from 018f31f..691fcce 691fcce Merge pull request #46 from NCAR/devel f66b214 Increase version number f6ec1c9 Merge pull request #45 from bertinia/devel ac0109c Merge branch 'devel' of https://github.com/NCAR/pyAverager into devel b9c77d3 remove the read_obs function from PreProc.py and move the functionality in-line with the create_pre_proc to avoid a new "feature" with PyNIO v1.5.3 that closes files when leaving a function scope; tested in both serial and parallel mode. git-subtree-dir: averager/pyAverager git-subtree-split: 691fcce24f31810a27ec20d04e569ad099225bba --- README | 4 ++-- pyaverager/PreProc.py | 36 +++++++----------------------------- pyaverager/__init__.py | 2 +- setup.py | 2 +- 4 files changed, 11 insertions(+), 33 deletions(-) diff --git a/README b/README index 8f26f3ce..0667e498 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -February 14, 2018 +September 6, 2018 ====================================== @@ -9,7 +9,7 @@ PyAverager A package used for computing averages from climate model output. Authors: Sheri Mickelson, Kevin Paul, and John Dennis -Version: 0.9.15 +Version: 0.9.16 Copyright: Contained within LICENSE.txt Comments and feedback: mickelso@ucar.edu diff --git a/pyaverager/PreProc.py b/pyaverager/PreProc.py index 4bae22b6..e3f6435d 100644 --- a/pyaverager/PreProc.py +++ b/pyaverager/PreProc.py @@ -18,34 +18,6 @@ def __init__(self,spec): self.create_pre_proc(spec) - def read_obs(self,obs_file,tarea,tlong,tlat): - - ''' - Read in the ice observation file to get area, lat, and lon values. - - @param obs_file The observation file to pull values from. - - @param tarea The variable name for tarea. - - @param tlong The variable name for tlong. - - @param tlat The vaiable name for tlat. - - @return lat A pointer to lat. - - @return lon A pointer to lon. - - @reutrn area The values for area. - - ''' - - file_hndl = Nio.open_file(obs_file,'r') - lat = file_hndl.variables[tlat] - lon = file_hndl.variables[tlong] - area = file_hndl.variables[tarea] - area = area[:]*1.0e-4 - return lat,lon,area - def read_reg_mask(self,reg_file,reg_name): ''' @@ -197,7 +169,13 @@ def create_pre_proc(self,spec): tarea = 'TAREA' tlong = 'TLONG' tlat = 'TLAT' - o_lat,o_lon,o_area = self.read_obs(obs_file,tarea,tlong,tlat) + + # Read in the ice observation file to get area, lat, and lon values. + obs_file_hndl = Nio.open_file(obs_file,'r') + o_lat = obs_file_hndl.variables[tlat] + o_lon = obs_file_hndl.variables[tlong] + o_area = obs_file_hndl.variables[tarea] + o_area = o_area[:]*1.0e-4 # If using time series files, open the variable's file now if (spec.hist_type == 'series'): diff --git a/pyaverager/__init__.py b/pyaverager/__init__.py index 186989d7..a484b342 100644 --- a/pyaverager/__init__.py +++ b/pyaverager/__init__.py @@ -2,5 +2,5 @@ import PyAverager, specification, PreProc -__version__ = "0.9.15" +__version__ = "0.9.16" diff --git a/setup.py b/setup.py index 19ed3616..81cc573e 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup setup(name='PyAverager', - version='0.9.15', + version='0.9.16', description='Parallel Python Averager for Climate Data', author='Sheri Mickelson', author_email='mickelso@ucar.edu', From 20227fcf5289ffe353fe93291de8a0766a59abd2 Mon Sep 17 00:00:00 2001 From: Alice Bertini Date: Sun, 9 Sep 2018 20:42:31 -0600 Subject: [PATCH 22/22] add capability to copy_html to handle multiple diag sets for each component; remove the create_index functionality in favor of simplicity and support of multiple diag sets for a given component --- README.md | 2 +- Tools/copy_html | 285 ++++++++++-------------------------------------- 2 files changed, 59 insertions(+), 228 deletions(-) diff --git a/README.md b/README.md index 55adf1b5..928d9f32 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ The input data sets required by this code are separate from this repository. Ins for accessing these data sets will be coming soon. For NCAR users, the data sets are already loaded into a central location on glade and do not need to be downloaded. -The NCAR cheyenne and geyser quick start guide along with other documentation is available at: +The NCAR cheyenne and DAV quick start guide along with other documentation is available at: http://github.com/NCAR/CESM_postprocessing/wiki/ diff --git a/Tools/copy_html b/Tools/copy_html index 16768657..d292b0a9 100755 --- a/Tools/copy_html +++ b/Tools/copy_html @@ -17,15 +17,9 @@ if sys.hexversion < 0x02070000: # built-in modules # import argparse -import collections -import datetime import errno import glob import os -import platform -import pprint -import re -import shutil import subprocess import traceback @@ -55,9 +49,6 @@ if hasattr(sys, 'real_prefix'): try: import cesm_utils except: - # - # activate the virtual environment that was created by create_python_env.sh - # activate_file = '{0}/cesm-env2/bin/activate_this.py'.format(postprocess_path) if not os.path.isfile(activate_file): err_msg = ('copy_html ERROR: the virtual environment in {0} does not exist.'.format(postprocess_path) \ @@ -69,9 +60,6 @@ if hasattr(sys, 'real_prefix'): except: raise OSError('copy_html ERROR: Unable to activate python virtualenv {0}'.format(activate_file)) else: - # - # activate the virtual environment that was created by create_python_env.sh - # activate_file = '{0}/cesm-env2/bin/activate_this.py'.format(postprocess_path) if not os.path.isfile(activate_file): err_msg = ('copy_html ERROR: the virtual environment in {0} does not exist.'.format(postprocess_path) \ @@ -88,9 +76,6 @@ if sys.version_info[0] == 2: else: from configparser import ConfigParser as config_parser -# -# import modules installed in the virtual environment -# from cesm_utils import cesmEnvLib import jinja2 @@ -229,33 +214,23 @@ def create_top_level(env, comp): # make sure top level remote directory exists try: - pipe = subprocess.Popen( ["ssh {0}@{1} 'mkdir -p {2}/{3}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)], env=env, shell=True) + pipe = subprocess.Popen( ["ssh {0}@{1} 'mkdir -p {2}/{3}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)], shell=True) pipe.wait() - except OSEerror as e: + except Exception as e: print('ERROR: unable to create remote directory {0}@{1}:{2}/{3}'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) print(' {0} - {1}'.format(e.errno, e.strerror)) sys.exit(1) - # create the logos subdir - try: - pipe = subprocess.Popen( ["ssh {0}@{1} 'mkdir -p {2}/logos'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'])], env=env, shell=True) - pipe.wait() - except OSEerror as e: - print('ERROR: unable to create remote directory {0}@{1}:{2}/logos'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'])) - print(' {0} - {1}'.format(e.errno, e.strerror)) - sys.exit(1) - - #======================================================================= # scp_files - scp files to a remote server #======================================================================= def scp_files(env, local, remote): try: - pipe = subprocess.Popen( ['scp -r {0} {1}'.format(local, remote)], env=env, shell=True) + pipe = subprocess.Popen( ['scp -r {0} {1}'.format(local, remote)], shell=True) pipe.wait() return True - except OSError as e: + except Exception as e: print('copy_html WARNING: scp command failed with error:') print(' {0} - {1}'.format(e.errno, e.strerror)) return False @@ -280,7 +255,12 @@ def read_paths(env, comp_data): for line in lines: values = line.split(':') if 'copied' not in values[-1].lower(): - env[values[-2]] = values[-1] + if values[-2] not in env.keys(): + env[values[-2]] = [values[-1]] + else: + env[values[-2]].append(values[-1]) + else: + env[values[-2]] = [] return env @@ -331,9 +311,10 @@ def copy_files(env, comp, comp_data): if comp != 'ocn': for diag_type, key in comp_data.iteritems(): # check if the diag_type key string that points to the local webdir is empty or not - if key in env: - if len(env[key]) > 0: - local = env[key] + if key in env.keys(): + for diag_dir in env[key]: + if len(diag_dir) > 0: + local = diag_dir if not os.path.isdir(local): print('copy_html WARNING: local directory = {0} does not exists.'.format(local)) else: @@ -343,202 +324,55 @@ def copy_files(env, comp, comp_data): print(' You will need to copy the files manually') else: # ocean need to create a tar file first - if os.path.isdir(env['OCNDIAG_WEBDIR']): - ok_to_copy = True - rootdir, workdir = os.path.split(env['OCNDIAG_WEBDIR']) - - # fix for when there is a / at the end of the path - if len(workdir) == 0: - rootdir, workdir = os.path.split(rootdir) - - tarfile = 'ocn{0}-{1}.tar.gz'.format(env['OCNDIAG_YEAR0'], env['OCNDIAG_YEAR1']) - cwd = os.getcwd() - os.chdir(rootdir) - if os.path.isfile(os.path.join(rootdir,tarfile)): - print('copy_html WARNING: ocean tar file = {0} already exists - please delete first.'.format(os.path.join(rootdir,tarfile))) - ok_to_copy = False - else: - tar_cmd = "tar cvfz {0} --exclude='*.nc' --exclude='*.nc_tmp' --exclude='*.tmp' --exclude='*.log.*' --exclude='*.asc' --exclude='*.ncl' --exclude='*.dt.*' {1}".format(tarfile, workdir) + for diag_dir in env['OCNDIAG_WEBDIR']: + if os.path.isdir(diag_dir): + ok_to_copy = True + rootdir, workdir = os.path.split(diag_dir) + + # fix for when there is a / at the end of the path + if len(workdir) == 0: + rootdir, workdir = os.path.split(rootdir) + + # parse the workdir for years + diag_parts = workdir.split('.')[-1].split('-') + year0 = diag_parts[0] + year1 = diag_parts[1] + + tarfile = 'ocn{0}-{1}.tar.gz'.format(year0, year1) + cwd = os.getcwd() + os.chdir(rootdir) + if os.path.isfile(os.path.join(rootdir,tarfile)): + print('copy_html WARNING: ocean tar file = {0} already exists - please delete first.'.format(os.path.join(rootdir,tarfile))) + ok_to_copy = False + else: + tar_cmd = "tar cvfz {0} --exclude='*.nc' --exclude='*.nc_tmp' --exclude='*.tmp' --exclude='*.log.*' --exclude='*.asc' --exclude='*.ncl' --exclude='*.dt.*' {1}".format(tarfile, workdir) try: - pipe = subprocess.Popen([tar_cmd], env=env, shell=True) + pipe = subprocess.Popen([tar_cmd], shell=True) pipe.wait() - except OSError as e: + except Exception as e: print('copy_html WARNING: unable to execute tar command {0}'.format(tar_cmd)) - print(' You will need to copy the files in {0} manually to a web server.'.format(env['OCNDIAG_WEBDIR'])) + print(' You will need to copy the files in {0} manually to a web server.'.format(diag_dir)) print(' {0} - {1}'.format(e.returncode, e.output)) ok_to_copy = False - if ok_to_copy: - if scp_files(env, tarfile, remote): - # untar the file on remote server - ok_to_remove = True - try: - pipe = subprocess.Popen(["ssh {0}@{1} 'cd {2}/{3} ; tar xvfz {4}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp,tarfile)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: unable to untar file {0} on remote server {1}@{2}:{3}/{4}'.format(tarfile, env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) - print(' You will need to untar files manually') - ok_to_remove = False - if ok_to_remove: - # remove the tar file on the remote server - try: - pipe = subprocess.Popen(["ssh {0}@{1} 'cd {2}/{3} ; rm {4}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp,tarfile)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: unable to remove tar file {0} on remote server {1}@{2}:{3}/{4}'.format(tarfile, env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) - os.chdir(cwd) - - -#======================================================================= -# create a main index page and copy it over to the remote server top level -#======================================================================= -def create_index(env, compList, activeList, comp_lookup): - """ create a main index.html page """ - - comp_casenames = {'atm' : {'model':'ATMDIAG_test_casename', 'control':'ATMDIAG_cntl_casename'}, - 'ice' : {'model':'ICEDIAG_CASE_TO_CONT', 'control':'ICEDIAG_CASE_TO_DIFF'}, - 'lnd' : {'model':'LNDDIAG_caseid_1', 'control':'LNDDIAG_caseid_2'}, - 'ocn' : {'model':'CASE', 'control':'OCNDIAG_CNTRLCASE'}} - - diag_dict = dict() - comp_data = dict() - link_dict = dict() - ocn_link_dict = dict() - - for comp in compList: - if comp in activeList: - # create a section for links to the active component - (model_start_year, model_stop_year, control_start_year, control_stop_year, \ - trends_start_year1, trends_stop_year1, trends_start_year2, trends_stop_year2) = get_years(env, comp) - # load up the diag_dict to be passed to the template with the case names and years - comp_data = comp_casenames[comp] - model = env[comp_data['model']] - control = env[comp_data['control']] - - # load the diag dict with template variables - diag_dict[comp] = {'model':model, 'model_start_year':model_start_year, 'model_stop_year':model_stop_year, \ - 'trends_start_year1':trends_start_year1, 'trends_stop_year1':trends_stop_year1, \ - 'control':control, 'control_start_year':control_start_year, 'control_stop_year':control_stop_year, \ - 'trends_start_year2':trends_start_year2, 'trends_stop_year2':trends_stop_year2} - - # get the remote relative links - comp_data = comp_lookup[comp] - if comp in ['atm', 'lnd']: - for diag_type, key in comp_data.iteritems(): - if key in env: - if len(env[key]) > 0: - root, diag_path = os.path.split(env[key]) - # fix for when there is a / at the end of the path - if len(diag_path) == 0: - root, diag_path = os.path.split(root) - local_diag_path = diag_path - if comp == 'lnd': - local_diag_path = '{0}/setsIndex.html'.format(diag_path) - link_dict[diag_type] = local_diag_path - else: - link_dict[diag_type] = None - diag_dict[comp].update(link_dict) - - elif comp == 'ice': - for diag_type, key in comp_data.iteritems(): - if key in env: - if len(env[key]) > 0: - root, diag_path = os.path.split(env[key]) - # fix for when there is a / at the end of the path - if len(diag_path) == 0: - root, diag_path = os.path.split(root) - local_diag_path = '{0}/yrs{1}-{2}/'.format(diag_path, env['ICEDIAG_BEGYR_CONT'], env['ICEDIAG_ENDYR_CONT']) - link_dict[diag_type] = local_diag_path - else: - link_dict[diag_type] = None - diag_dict[comp].update(link_dict) - - elif comp == 'ocn': - ocn_diag_types = {'OCNDIAG_MODEL_VS_OBS':('MODEL_VS_OBS','{0} (years {1}-{2}) - Observations'.format(model, model_start_year, model_stop_year)), \ - 'OCNDIAG_MODEL_VS_OBS_ECOSYS':('MODEL_VS_OBS_ECOSYS','{0} (years {1}-{2}) - Observations w/ ecosystem'.format(model, model_start_year, model_stop_year)), \ - 'OCNDIAG_MODEL_VS_CONTROL':('MODEL_VS_CONTROL_{0}'.format(control),'{0} (years {1}-{2}) - {3} (years {4}-{5})'.format(model, model_start_year, model_stop_year, control, control_start_year, control_stop_year)), \ - 'OCNDIAG_MODEL_VS_CONTROL_ECOSYS':('MODEL_VS_CONTROL_ECOSYS_{0}'.format(control),'{0} (years {1}-{2}) - {3} (years {4}-{5}) w/ ecosystem'.format(model, model_start_year, model_stop_year, control, control_start_year, control_stop_year)), \ - 'OCNDIAG_MODEL_TIMESERIES':('MODEL_TIMESERIES','{0} Timeseries (years {1}-{2})'.format(model, trends_start_year1, trends_stop_year1)), \ - 'OCNDIAG_MODEL_TIMESERIES_ECOSYS':('MODEL_TIMESERIES_ECOSYS','{0} Timeseries w/ ecosystem (years {1}-{2})'.format(model, trends_start_year1, trends_stop_year1))} - - for diag_type, key in comp_data.iteritems(): - if key in env: - if len(env[key]) > 0: - root, diag_path = os.path.split(env[key]) - # fix for when there is a / at the end of the path - if len(diag_path) == 0: - root, diag_path = os.path.split(root) - for ocn_diag_type, link_list in ocn_diag_types.iteritems(): - if env[ocn_diag_type].upper() in ['T','TRUE']: - local_diag_path = '{0}/{1}'.format(diag_path, link_list[0]) - ocn_link_dict[ocn_diag_type] = (local_diag_path, link_list[1]) - else: - ocn_link_dict[ocn_diag_type] = None - else: - ocn_link_dict[ocn_diag_type] = None - - # create the jinja template - templatePath = '{0}/Templates'.format(env['POSTPROCESS_PATH']) - - templateLoader = jinja2.FileSystemLoader( searchpath=templatePath ) - templateEnv = jinja2.Environment( loader=templateLoader ) - - template_file = 'diagnostics.tmpl' - template = templateEnv.get_template( template_file ) - - # get the current datatime string for the template and filename - now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - index_now = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') - - # set the template variables - templateVars = { 'casename' : env['CASE'], - 'tagname' : env['CESM_TAG'], - 'username' : env['USER_NAME'], - 'diag_dict' : collections.OrderedDict(sorted(diag_dict.items())), - 'ocn_link_dict': ocn_link_dict, - 'today': now, - } - - # write the main index.html page to the top working directory - main_html = template.render( templateVars ) - workdir = '{0}/{1}'.format(env['PP_CASE_PATH'],'html_files') - if not os.path.exists(workdir): - os.makedirs(workdir) - - with open( '{0}/index.{1}.html'.format(workdir, index_now), 'w') as index: - index.write(main_html) - - # copy the and style sheet to the top level - remote = '{0}@{1}:{2}'.format(env['GLOBAL_WEBLOGIN'], env['GLOBAL_WEBHOST'], env['GLOBAL_REMOTE_WEBDIR']) - localdir = '{0}/Templates/'.format(env['POSTPROCESS_PATH']) - - local = '{0}/*.css'.format(localdir) - try: - pipe = subprocess.Popen( ['scp {0} {1}'.format(local, remote)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: scp command failed with error::') - print(' {0} - {1}'.format(e.errno, e.strerror)) - - # copy the top-level index.html - local = '{0}/index.{1}.html'.format(workdir, index_now) - try: - pipe = subprocess.Popen( ['scp {0} {1}'.format(local, remote)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: scp command failed with error:') - print(' {0} - {1}'.format(e.errno, e.strerror)) - - # copy the logos to the sub-dir - remote_logos = '{0}@{1}:{2}/logos'.format(env['GLOBAL_WEBLOGIN'], env['GLOBAL_WEBHOST'], env['GLOBAL_REMOTE_WEBDIR']) - local = '{0}/logos/*.*'.format(localdir) - try: - pipe = subprocess.Popen( ['scp {0} {1}'.format(local, remote_logos)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: scp command failed with error::') - print(' {0} - {1}'.format(e.errno, e.strerror)) - - + if ok_to_copy: + if scp_files(env, tarfile, remote): + # untar the file on remote server + ok_to_remove = True + try: + pipe = subprocess.Popen(["ssh {0}@{1} 'cd {2}/{3} ; tar xvfz {4}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp,tarfile)], shell=True) + pipe.wait() + except Exception as e: + print('copy_html WARNING: unable to untar file {0} on remote server {1}@{2}:{3}/{4}'.format(tarfile, env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) + print(' You will need to untar files manually') + ok_to_remove = False + if ok_to_remove: + # remove the tar file on the remote server + try: + pipe = subprocess.Popen(["ssh {0}@{1} 'cd {2}/{3} ; rm {4}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp,tarfile)], shell=True) + pipe.wait() + except Exception as e: + print('copy_html WARNING: unable to remove tar file {0} on remote server {1}@{2}:{3}/{4}'.format(tarfile, env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) + os.chdir(cwd) #======================================================================= # main @@ -589,9 +423,6 @@ def main(options): activeList.append(comp) update_web_dirs(env, comp_data) - # build a single web page to link to all the different components - create_index(env, compList, activeList, comp_lookup) - #=================================== if __name__ == "__main__":