diff --git a/Config/config_postprocess.xml b/Config/config_postprocess.xml index ac881a49..446c75de 100644 --- a/Config/config_postprocess.xml +++ b/Config/config_postprocess.xml @@ -43,12 +43,12 @@ desc="post processing directory location on local machine where cesm-env2 python virtualenv is located." > - + + 64 + 64 + srun + + f2py + + ifort + -c -g -O2 + -I/glade/u/apps/dav/opt/netcdf/4.6.1/intel/17.0.1/include + -L/glade/u/apps/dav/opt/netcdf/4.6.1/intel/17.0.1/lib -lnetcdff -lnetcdf + + + module purge + + + module load python/2.7.14 + module load intel/17.0.1 + module load ncarenv + module load ncarcompilers + module load impi + module load netcdf/4.6.1 + module load nco/4.7.4 + module load ncl/6.4.0 + + + + 32 + 16 + 6 + /glade/p/cesm/amwg/amwg_data + + + 32 + 4 + /glade/p/cesm/pcwg/ice/data + + + 32 + 12 + 6 + /glade/p/cesm/lmwg/diag/lnd_diag_data + + + 32 + 16 + /glade/p/cesm/ + + + 2 + 1 + /glade/p/cesm/lmwg_dev/oleson/ILAMB/ILAMB_all + + + 2 + 1 + /glade/p/cesm/omwg/obs_data/IOMB + + + + 64 64 srun - /glade/apps/opt/python/2.7.7/gnu-westmere/4.8.2/lib/python2.7/site-packages + f2py ifort -c -g -O2 - -I/glade/apps/opt/netcdf/4.3.3.1/intel/16.0.0/include - -L/glade/apps/opt/netcdf/4.3.3.1/intel/16.0.0/lib -lnetcdff -lnetcdf + -I/glade/u/apps/dav/opt/netcdf/4.6.1/intel/17.0.1/include + -L/glade/u/apps/dav/opt/netcdf/4.6.1/intel/17.0.1/lib -lnetcdff -lnetcdf module purge - module load python/2.7.7 - module load intel/16.0.3 + module load python/2.7.14 + module load intel/17.0.1 module load ncarenv - module load ncarbinlibs module load ncarcompilers - module load slurm/17 - module load impi/5.1.1.109 - module load numpy/1.11.0 - module load scipy/0.18.1 - module load mpi4py/2.0.0-impi - module load pynio/1.4.1 - module load pyside/1.1.2 - module load matplotlib/1.5.1 - module load netcdf/4.3.0 - module load nco/4.4.4 - module load netcdf4python/1.2.4 + module load impi + module load netcdf/4.6.1 + module load nco/4.7.4 module load ncl/6.4.0 - module load pyngl/1.4.0 @@ -82,29 +133,21 @@ ifort -c -g -O2 - -I/glade/u/apps/ch/opt/netcdf/4.4.1.1/intel/16.0.3/include - -L/glade/u/apps/ch/opt/netcdf/4.4.1.1/intel/16.0.3/lib -lnetcdff -lnetcdf + -I/glade/u/apps/ch/opt/netcdf/4.6.1/intel/17.0.1/include + -L/glade/u/apps/ch/opt/netcdf/4.6.1/intel/17.0.1/lib -lnetcdff -lnetcdf module purge - module load ncarenv/1.2 - module load intel/17.0.1 - module load ncarcompilers/0.4.1 - module load mpt/2.15f - module load python/2.7.13 - module load numpy/1.12.0 - module load scipy/0.18.1 - module load mpi4py/2.0.0-mpt - module load pynio/1.4.1 - module load matplotlib/2.0.0 - module load netcdf/4.4.1.1 - module load nco/4.6.2 - module load netcdf4-python/1.2.7 - module load cf_units/1.1.3 + module load python/2.7.14 + module load intel/17.0.1 + module load ncarenv + module load ncarcompilers + module load mpt/2.15f + module load netcdf/4.6.1 + module load nco/4.7.4 module load ncl/6.4.0 - module load pyngl/1.5.0b diff --git a/Machines/machine_postprocess.xsd b/Machines/machine_postprocess.xsd index 3b4c8f0f..d2765b71 100644 --- a/Machines/machine_postprocess.xsd +++ b/Machines/machine_postprocess.xsd @@ -28,7 +28,7 @@ - + @@ -42,7 +42,7 @@ - + @@ -56,7 +56,7 @@ - + @@ -70,7 +70,7 @@ - + @@ -84,7 +84,7 @@ - + diff --git a/Makefile b/Makefile index 81764c37..4f842b11 100644 --- a/Makefile +++ b/Makefile @@ -26,8 +26,8 @@ SUBDIRS = \ timeseries \ conformer \ conform \ - diagnostics \ ilamb \ + diagnostics # MAKECMDGOALS is the make option: make 'clobber' or 'all' TARGET = $(MAKECMDGOALS) diff --git a/Templates/batch_cheyenne.tmpl b/Templates/batch_cheyenne.tmpl index 4d1d8041..3bfb6d65 100644 --- a/Templates/batch_cheyenne.tmpl +++ b/Templates/batch_cheyenne.tmpl @@ -1,13 +1,13 @@ +#!/bin/bash + #PBS -N {{ processName }} #PBS -q {{ queue }} #PBS -l select={{ nodes }}:ncpus={{ ppn }}:mpiprocs={{ ppn }} #PBS -l walltime={{ wallclock }} #PBS -A {{ project }} -. /glade/u/apps/ch/opt/lmod/7.2.1/lmod/lmod/init/bash +source /etc/profile.d/modules.sh -export I_MPI_DEVICE=rdma export MPI_UNBUFFERED_STDIO=true export TMPDIR=$TMPDIR -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/glade/u/apps/ch/opt/pythonpkgs/2.7/cf_units/1.1.3/gnu/6.2.0/lib diff --git a/Templates/batch_dav.tmpl b/Templates/batch_dav.tmpl new file mode 100644 index 00000000..54a6f5e0 --- /dev/null +++ b/Templates/batch_dav.tmpl @@ -0,0 +1,13 @@ +#! /bin/bash -l + +#SBATCH -n {{ pes }} +#SBATCH -N {{ nodes }} +#SBATCH --ntasks-per-node={{ ppn }} +#SBATCH -t {{ wallclock }} +#SBATCH -p dav +#SBATCH -J {{ processName }} +#SBATCH -A {{ project }} +#SBATCH --mem {{ memory }} +#SBATCH -e {{ processName }}.err.%J +#SBATCH -o {{ processName }}.out.%J +#SBATCH -m block diff --git a/Templates/batch_geyser.tmpl b/Templates/batch_geyser.tmpl index 4f0b7287..54a6f5e0 100644 --- a/Templates/batch_geyser.tmpl +++ b/Templates/batch_geyser.tmpl @@ -1,3 +1,5 @@ +#! /bin/bash -l + #SBATCH -n {{ pes }} #SBATCH -N {{ nodes }} #SBATCH --ntasks-per-node={{ ppn }} @@ -5,11 +7,7 @@ #SBATCH -p dav #SBATCH -J {{ processName }} #SBATCH -A {{ project }} -#SBATCH -C {{ queue }} #SBATCH --mem {{ memory }} #SBATCH -e {{ processName }}.err.%J #SBATCH -o {{ processName }}.out.%J - -source /glade/u/apps/opt/slurm_init/init.sh - -export LD_LIBRARY_PATH=/glade/apps/opt/netcdf/4.3.0/gnu/default/lib:$LD_LIBRARY_PATH +#SBATCH -m block diff --git a/Templates/cylc_batch_dav.tmpl b/Templates/cylc_batch_dav.tmpl new file mode 100644 index 00000000..05c709b1 --- /dev/null +++ b/Templates/cylc_batch_dav.tmpl @@ -0,0 +1,12 @@ +-n {{ pes }} +-N {{ nodes }} +--ntasks-per-node={{ ppn }} +-t {{ wallclock }} +-p dav +-J {{ processName }} +-A {{ project }} +-C {{ queue }} +-e {{ processName }}.err.%J +-o {{ processName }}.out.%J +--mem {{ memory }} +-m block diff --git a/Templates/cylc_batch_geyser.tmpl b/Templates/cylc_batch_geyser.tmpl index fc72703f..05c709b1 100644 --- a/Templates/cylc_batch_geyser.tmpl +++ b/Templates/cylc_batch_geyser.tmpl @@ -9,3 +9,4 @@ -e {{ processName }}.err.%J -o {{ processName }}.out.%J --mem {{ memory }} +-m block diff --git a/Templates/iconform.tmpl b/Templates/iconform.tmpl index 63690122..46898013 100644 --- a/Templates/iconform.tmpl +++ b/Templates/iconform.tmpl @@ -13,7 +13,7 @@ if [ ! -e {{ virtualEnvDir }} ]; then echo "CESM {{ processName }} exiting due to non-existant python virtual environment in" echo " {{ virtualEnvDir }}" echo "You must first run:" - echo "$SRCROOT/postprocessing/create_python_env.sh -machine [machine]" + echo "$SRCROOT/postprocessing/create_python_env -machine [machine]" echo "*************************************************************************************" exit fi diff --git a/Templates/postprocess.tmpl b/Templates/postprocess.tmpl index f7b5caa1..63d8caf4 100644 --- a/Templates/postprocess.tmpl +++ b/Templates/postprocess.tmpl @@ -1,4 +1,5 @@ -#! /usr/bin/env bash +{{ batchdirectives }} + ########## ## ## See https://github.com/NCAR/CESM_postprocessing/wiki for details @@ -6,45 +7,33 @@ ## ########## -{{ batchdirectives }} - if [ ! -e {{ virtualEnvDir }} ]; then echo "*************************************************************************************" echo "CESM {{ processName }} exiting due to non-existant python virtual environment in" echo " {{ virtualEnvDir }}" echo "You must first run:" - echo "$SRCROOT/postprocessing/create_python_env.sh -machine [machine]" + echo "$POSTPROCESS_PATH/create_python_env -machine [machine]" echo "*************************************************************************************" exit fi -## NOTE: the module load order and when the python virtualenv is activated is IMPORTANT! -## Purging the modules first clears all environment variables that might have been set -## by the virtualenv activation. Consequently, in order to ensure a correct environment -## we must activate the virtualenv *after* the purge. - -## 1. purge and load the default system modules that the virtualenv was built with - {% for module in reset_modules %} {{ module }} {% endfor %} -## 2. check the processName for ocn_diagnostics_geyser and set the OCNDIAG_DIAGROOTPATH -## to point to the geyser virtualenv in order for the correct za compiled tool - -{% if "ocn_diagnostics_geyser" in processName %} -pp_geyser_path=`./pp_config --get POSTPROCESS_PATH_GEYSER --value` -./pp_config --set OCNDIAG_DIAGROOTPATH=$pp_geyser_path/ocn_diag +{% if "ocn_diagnostics_dav" in processName %} +pp_dav_path=`./pp_config --get POSTPROCESS_PATH_DAV --value` +./pp_config --set OCNDIAG_DIAGROOTPATH=$pp_dav_path/ocn_diag {% endif %} -## 3. activate the virtualenv that contains all the non-bootstrapped dependencies +## activate the virtualenv that contains all the non-bootstrapped dependencies cd {{ virtualEnvDir }} echo "Running from virtualenv directory:" pwd . activate -## 4. load the boot-strap modules +## load the boot-strap modules {% for module in modules %} {{ module }} diff --git a/Templates/xconform.tmpl b/Templates/xconform.tmpl index d73e1565..4430bbd0 100644 --- a/Templates/xconform.tmpl +++ b/Templates/xconform.tmpl @@ -13,7 +13,7 @@ if [ ! -e {{ virtualEnvDir }} ]; then echo "CESM {{ processName }} exiting due to non-existant python virtual environment in" echo " {{ virtualEnvDir }}" echo "You must first run:" - echo "$SRCROOT/postprocessing/create_python_env.sh -machine [machine]" + echo "$SRCROOT/postprocessing/create_python_env -machine [machine]" echo "*************************************************************************************" exit fi diff --git a/Tools/copy_html b/Tools/copy_html index 35660681..d292b0a9 100755 --- a/Tools/copy_html +++ b/Tools/copy_html @@ -17,15 +17,9 @@ if sys.hexversion < 0x02070000: # built-in modules # import argparse -import collections -import datetime import errno import glob import os -import platform -import pprint -import re -import shutil import subprocess import traceback @@ -55,9 +49,6 @@ if hasattr(sys, 'real_prefix'): try: import cesm_utils except: - # - # activate the virtual environment that was created by create_python_env.sh - # activate_file = '{0}/cesm-env2/bin/activate_this.py'.format(postprocess_path) if not os.path.isfile(activate_file): err_msg = ('copy_html ERROR: the virtual environment in {0} does not exist.'.format(postprocess_path) \ @@ -69,9 +60,6 @@ if hasattr(sys, 'real_prefix'): except: raise OSError('copy_html ERROR: Unable to activate python virtualenv {0}'.format(activate_file)) else: - # - # activate the virtual environment that was created by create_python_env.sh - # activate_file = '{0}/cesm-env2/bin/activate_this.py'.format(postprocess_path) if not os.path.isfile(activate_file): err_msg = ('copy_html ERROR: the virtual environment in {0} does not exist.'.format(postprocess_path) \ @@ -88,9 +76,6 @@ if sys.version_info[0] == 2: else: from configparser import ConfigParser as config_parser -# -# import modules installed in the virtual environment -# from cesm_utils import cesmEnvLib import jinja2 @@ -112,6 +97,15 @@ def commandline_options(): parser.add_argument('-debug', '--debug', nargs=1, required=False, type=int, default=0, help='debugging verbosity level output: 0 = none, 1 = minimum, 2 = maximum. 0 is default') + parser.add_argument('--use-ssh-key', dest='use_ssh_key', action='store_true', + help='Use a ssh key to connect to the remove web host defined by '\ + 'XML variables "GLOBAL_WEBHOST" and "GLOBAL_WEBLOGIN". '\ + 'If a ssh key is not present, then this option will cause '\ + 'execution to stop as opposed to issuing a warning '\ + 'and prompting for a password multiple times. '\ + 'More details about how to create ssh keys is available at '\ + '"http://tools.cgd.ucar.edu/make_user_ssh_keys/index.html"') + options = parser.parse_args() return options @@ -197,7 +191,7 @@ def get_years(env, comp): #======================================================================= # check_ssh_key #======================================================================= -def check_ssh_key(env): +def check_ssh_key(env, use_ssh_key): # check if ssh key is set for passwordless access to the web host try: @@ -205,8 +199,12 @@ def check_ssh_key(env): stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as e: - print('WARNING: unable to connect to remote web host {0}@{1} without a password'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'])) - print(' You will be prompted for a password multiple times in order to copy the files.') + if use_ssh_key: + print('ERROR: unable to connect to remote web host {0}@{1} without a password'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'])) + sys.exit(1) + else: + print('WARNING: unable to connect to remote web host {0}@{1} without a password'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'])) + print(' You will be prompted for a password multiple times in order to copy the files.') #======================================================================= @@ -216,33 +214,23 @@ def create_top_level(env, comp): # make sure top level remote directory exists try: - pipe = subprocess.Popen( ["ssh {0}@{1} 'mkdir -p {2}/{3}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)], env=env, shell=True) + pipe = subprocess.Popen( ["ssh {0}@{1} 'mkdir -p {2}/{3}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)], shell=True) pipe.wait() - except OSEerror as e: + except Exception as e: print('ERROR: unable to create remote directory {0}@{1}:{2}/{3}'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) print(' {0} - {1}'.format(e.errno, e.strerror)) sys.exit(1) - # create the logos subdir - try: - pipe = subprocess.Popen( ["ssh {0}@{1} 'mkdir -p {2}/logos'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'])], env=env, shell=True) - pipe.wait() - except OSEerror as e: - print('ERROR: unable to create remote directory {0}@{1}:{2}/logos'.format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'])) - print(' {0} - {1}'.format(e.errno, e.strerror)) - sys.exit(1) - - #======================================================================= # scp_files - scp files to a remote server #======================================================================= def scp_files(env, local, remote): try: - pipe = subprocess.Popen( ['scp -r {0} {1}'.format(local, remote)], env=env, shell=True) + pipe = subprocess.Popen( ['scp -r {0} {1}'.format(local, remote)], shell=True) pipe.wait() return True - except OSError as e: + except Exception as e: print('copy_html WARNING: scp command failed with error:') print(' {0} - {1}'.format(e.errno, e.strerror)) return False @@ -267,7 +255,12 @@ def read_paths(env, comp_data): for line in lines: values = line.split(':') if 'copied' not in values[-1].lower(): - env[values[-2]] = values[-1] + if values[-2] not in env.keys(): + env[values[-2]] = [values[-1]] + else: + env[values[-2]].append(values[-1]) + else: + env[values[-2]] = [] return env @@ -318,9 +311,10 @@ def copy_files(env, comp, comp_data): if comp != 'ocn': for diag_type, key in comp_data.iteritems(): # check if the diag_type key string that points to the local webdir is empty or not - if key in env: - if len(env[key]) > 0: - local = env[key] + if key in env.keys(): + for diag_dir in env[key]: + if len(diag_dir) > 0: + local = diag_dir if not os.path.isdir(local): print('copy_html WARNING: local directory = {0} does not exists.'.format(local)) else: @@ -330,202 +324,55 @@ def copy_files(env, comp, comp_data): print(' You will need to copy the files manually') else: # ocean need to create a tar file first - if os.path.isdir(env['OCNDIAG_WEBDIR']): - ok_to_copy = True - rootdir, workdir = os.path.split(env['OCNDIAG_WEBDIR']) - - # fix for when there is a / at the end of the path - if len(workdir) == 0: - rootdir, workdir = os.path.split(rootdir) - - tarfile = 'ocn{0}-{1}.tar.gz'.format(env['OCNDIAG_YEAR0'], env['OCNDIAG_YEAR1']) - cwd = os.getcwd() - os.chdir(rootdir) - if os.path.isfile(os.path.join(rootdir,tarfile)): - print('copy_html WARNING: ocean tar file = {0} already exists - please delete first.'.format(os.path.join(rootdir,tarfile))) - ok_to_copy = False - else: - tar_cmd = "tar cvfz {0} --exclude='*.nc' --exclude='*.nc_tmp' --exclude='*.tmp' --exclude='*.log.*' --exclude='*.asc' --exclude='*.ncl' --exclude='*.dt.*' {1}".format(tarfile, workdir) + for diag_dir in env['OCNDIAG_WEBDIR']: + if os.path.isdir(diag_dir): + ok_to_copy = True + rootdir, workdir = os.path.split(diag_dir) + + # fix for when there is a / at the end of the path + if len(workdir) == 0: + rootdir, workdir = os.path.split(rootdir) + + # parse the workdir for years + diag_parts = workdir.split('.')[-1].split('-') + year0 = diag_parts[0] + year1 = diag_parts[1] + + tarfile = 'ocn{0}-{1}.tar.gz'.format(year0, year1) + cwd = os.getcwd() + os.chdir(rootdir) + if os.path.isfile(os.path.join(rootdir,tarfile)): + print('copy_html WARNING: ocean tar file = {0} already exists - please delete first.'.format(os.path.join(rootdir,tarfile))) + ok_to_copy = False + else: + tar_cmd = "tar cvfz {0} --exclude='*.nc' --exclude='*.nc_tmp' --exclude='*.tmp' --exclude='*.log.*' --exclude='*.asc' --exclude='*.ncl' --exclude='*.dt.*' {1}".format(tarfile, workdir) try: - pipe = subprocess.Popen([tar_cmd], env=env, shell=True) + pipe = subprocess.Popen([tar_cmd], shell=True) pipe.wait() - except OSError as e: + except Exception as e: print('copy_html WARNING: unable to execute tar command {0}'.format(tar_cmd)) - print(' You will need to copy the files in {0} manually to a web server.'.format(env['OCNDIAG_WEBDIR'])) + print(' You will need to copy the files in {0} manually to a web server.'.format(diag_dir)) print(' {0} - {1}'.format(e.returncode, e.output)) ok_to_copy = False - if ok_to_copy: - if scp_files(env, tarfile, remote): - # untar the file on remote server - ok_to_remove = True - try: - pipe = subprocess.Popen(["ssh {0}@{1} 'cd {2}/{3} ; tar xvfz {4}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp,tarfile)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: unable to untar file {0} on remote server {1}@{2}:{3}/{4}'.format(tarfile, env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) - print(' You will need to untar files manually') - ok_to_remove = False - if ok_to_remove: - # remove the tar file on the remote server - try: - pipe = subprocess.Popen(["ssh {0}@{1} 'cd {2}/{3} ; rm {4}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp,tarfile)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: unable to remove tar file {0} on remote server {1}@{2}:{3}/{4}'.format(tarfile, env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) - os.chdir(cwd) - - -#======================================================================= -# create a main index page and copy it over to the remote server top level -#======================================================================= -def create_index(env, compList, activeList, comp_lookup): - """ create a main index.html page """ - - comp_casenames = {'atm' : {'model':'ATMDIAG_test_casename', 'control':'ATMDIAG_cntl_casename'}, - 'ice' : {'model':'ICEDIAG_CASE_TO_CONT', 'control':'ICEDIAG_CASE_TO_DIFF'}, - 'lnd' : {'model':'LNDDIAG_caseid_1', 'control':'LNDDIAG_caseid_2'}, - 'ocn' : {'model':'CASE', 'control':'OCNDIAG_CNTRLCASE'}} - - diag_dict = dict() - comp_data = dict() - link_dict = dict() - ocn_link_dict = dict() - - for comp in compList: - if comp in activeList: - # create a section for links to the active component - (model_start_year, model_stop_year, control_start_year, control_stop_year, \ - trends_start_year1, trends_stop_year1, trends_start_year2, trends_stop_year2) = get_years(env, comp) - # load up the diag_dict to be passed to the template with the case names and years - comp_data = comp_casenames[comp] - model = env[comp_data['model']] - control = env[comp_data['control']] - - # load the diag dict with template variables - diag_dict[comp] = {'model':model, 'model_start_year':model_start_year, 'model_stop_year':model_stop_year, \ - 'trends_start_year1':trends_start_year1, 'trends_stop_year1':trends_stop_year1, \ - 'control':control, 'control_start_year':control_start_year, 'control_stop_year':control_stop_year, \ - 'trends_start_year2':trends_start_year2, 'trends_stop_year2':trends_stop_year2} - - # get the remote relative links - comp_data = comp_lookup[comp] - if comp in ['atm', 'lnd']: - for diag_type, key in comp_data.iteritems(): - if key in env: - if len(env[key]) > 0: - root, diag_path = os.path.split(env[key]) - # fix for when there is a / at the end of the path - if len(diag_path) == 0: - root, diag_path = os.path.split(root) - local_diag_path = diag_path - if comp == 'lnd': - local_diag_path = '{0}/setsIndex.html'.format(diag_path) - link_dict[diag_type] = local_diag_path - else: - link_dict[diag_type] = None - diag_dict[comp].update(link_dict) - - elif comp == 'ice': - for diag_type, key in comp_data.iteritems(): - if key in env: - if len(env[key]) > 0: - root, diag_path = os.path.split(env[key]) - # fix for when there is a / at the end of the path - if len(diag_path) == 0: - root, diag_path = os.path.split(root) - local_diag_path = '{0}/yrs{1}-{2}/'.format(diag_path, env['ICEDIAG_BEGYR_CONT'], env['ICEDIAG_ENDYR_CONT']) - link_dict[diag_type] = local_diag_path - else: - link_dict[diag_type] = None - diag_dict[comp].update(link_dict) - - elif comp == 'ocn': - ocn_diag_types = {'OCNDIAG_MODEL_VS_OBS':('MODEL_VS_OBS','{0} (years {1}-{2}) - Observations'.format(model, model_start_year, model_stop_year)), \ - 'OCNDIAG_MODEL_VS_OBS_ECOSYS':('MODEL_VS_OBS_ECOSYS','{0} (years {1}-{2}) - Observations w/ ecosystem'.format(model, model_start_year, model_stop_year)), \ - 'OCNDIAG_MODEL_VS_CONTROL':('MODEL_VS_CONTROL_{0}'.format(control),'{0} (years {1}-{2}) - {3} (years {4}-{5})'.format(model, model_start_year, model_stop_year, control, control_start_year, control_stop_year)), \ - 'OCNDIAG_MODEL_VS_CONTROL_ECOSYS':('MODEL_VS_CONTROL_ECOSYS_{0}'.format(control),'{0} (years {1}-{2}) - {3} (years {4}-{5}) w/ ecosystem'.format(model, model_start_year, model_stop_year, control, control_start_year, control_stop_year)), \ - 'OCNDIAG_MODEL_TIMESERIES':('MODEL_TIMESERIES','{0} Timeseries (years {1}-{2})'.format(model, trends_start_year1, trends_stop_year1)), \ - 'OCNDIAG_MODEL_TIMESERIES_ECOSYS':('MODEL_TIMESERIES_ECOSYS','{0} Timeseries w/ ecosystem (years {1}-{2})'.format(model, trends_start_year1, trends_stop_year1))} - - for diag_type, key in comp_data.iteritems(): - if key in env: - if len(env[key]) > 0: - root, diag_path = os.path.split(env[key]) - # fix for when there is a / at the end of the path - if len(diag_path) == 0: - root, diag_path = os.path.split(root) - for ocn_diag_type, link_list in ocn_diag_types.iteritems(): - if env[ocn_diag_type].upper() in ['T','TRUE']: - local_diag_path = '{0}/{1}'.format(diag_path, link_list[0]) - ocn_link_dict[ocn_diag_type] = (local_diag_path, link_list[1]) - else: - ocn_link_dict[ocn_diag_type] = None - else: - ocn_link_dict[ocn_diag_type] = None - - # create the jinja template - templatePath = '{0}/Templates'.format(env['POSTPROCESS_PATH']) - - templateLoader = jinja2.FileSystemLoader( searchpath=templatePath ) - templateEnv = jinja2.Environment( loader=templateLoader ) - - template_file = 'diagnostics.tmpl' - template = templateEnv.get_template( template_file ) - - # get the current datatime string for the template and filename - now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - index_now = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') - - # set the template variables - templateVars = { 'casename' : env['CASE'], - 'tagname' : env['CESM_TAG'], - 'username' : env['USER_NAME'], - 'diag_dict' : collections.OrderedDict(sorted(diag_dict.items())), - 'ocn_link_dict': ocn_link_dict, - 'today': now, - } - - # write the main index.html page to the top working directory - main_html = template.render( templateVars ) - workdir = '{0}/{1}'.format(env['PP_CASE_PATH'],'html_files') - if not os.path.exists(workdir): - os.makedirs(workdir) - - with open( '{0}/index.{1}.html'.format(workdir, index_now), 'w') as index: - index.write(main_html) - - # copy the and style sheet to the top level - remote = '{0}@{1}:{2}'.format(env['GLOBAL_WEBLOGIN'], env['GLOBAL_WEBHOST'], env['GLOBAL_REMOTE_WEBDIR']) - localdir = '{0}/Templates/'.format(env['POSTPROCESS_PATH']) - - local = '{0}/*.css'.format(localdir) - try: - pipe = subprocess.Popen( ['scp {0} {1}'.format(local, remote)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: scp command failed with error::') - print(' {0} - {1}'.format(e.errno, e.strerror)) - - # copy the top-level index.html - local = '{0}/index.{1}.html'.format(workdir, index_now) - try: - pipe = subprocess.Popen( ['scp {0} {1}'.format(local, remote)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: scp command failed with error:') - print(' {0} - {1}'.format(e.errno, e.strerror)) - - # copy the logos to the sub-dir - remote_logos = '{0}@{1}:{2}/logos'.format(env['GLOBAL_WEBLOGIN'], env['GLOBAL_WEBHOST'], env['GLOBAL_REMOTE_WEBDIR']) - local = '{0}/logos/*.*'.format(localdir) - try: - pipe = subprocess.Popen( ['scp {0} {1}'.format(local, remote_logos)], env=env, shell=True) - pipe.wait() - except OSError as e: - print('copy_html WARNING: scp command failed with error::') - print(' {0} - {1}'.format(e.errno, e.strerror)) - - + if ok_to_copy: + if scp_files(env, tarfile, remote): + # untar the file on remote server + ok_to_remove = True + try: + pipe = subprocess.Popen(["ssh {0}@{1} 'cd {2}/{3} ; tar xvfz {4}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp,tarfile)], shell=True) + pipe.wait() + except Exception as e: + print('copy_html WARNING: unable to untar file {0} on remote server {1}@{2}:{3}/{4}'.format(tarfile, env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) + print(' You will need to untar files manually') + ok_to_remove = False + if ok_to_remove: + # remove the tar file on the remote server + try: + pipe = subprocess.Popen(["ssh {0}@{1} 'cd {2}/{3} ; rm {4}'".format(env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp,tarfile)], shell=True) + pipe.wait() + except Exception as e: + print('copy_html WARNING: unable to remove tar file {0} on remote server {1}@{2}:{3}/{4}'.format(tarfile, env['GLOBAL_WEBLOGIN'],env['GLOBAL_WEBHOST'],env['GLOBAL_REMOTE_WEBDIR'],comp)) + os.chdir(cwd) #======================================================================= # main @@ -559,8 +406,8 @@ def main(options): # load the env with all the env file entries env = cesmEnvLib.readXML(pp_caseroot, envFileList) - # check if sshkey is set - check_ssh_key(env) + # check if ssh key is set + check_ssh_key(env, options.use_ssh_key) # copy the different diag component web files for comp in compList: @@ -576,9 +423,6 @@ def main(options): activeList.append(comp) update_web_dirs(env, comp_data) - # build a single web page to link to all the different components - create_index(env, compList, activeList, comp_lookup) - #=================================== if __name__ == "__main__": diff --git a/Tools/pp_config b/Tools/pp_config index 3e20cfca..90dd1ed6 100755 --- a/Tools/pp_config +++ b/Tools/pp_config @@ -93,7 +93,7 @@ import jinja2 # global variables _scripts = ['timeseries','averages','regrid','diagnostics','xconform'] -_machines = ['cheyenne','edison','geyser'] +_machines = ['cheyenne','edison','dav'] _comps = ['atm','ice','lnd','ocn'] # ------------------------------------------------------------------------------- diff --git a/Tools/ration.log b/Tools/ration.log index 247a75d7..06a39061 100644 --- a/Tools/ration.log +++ b/Tools/ration.log @@ -1,64 +1,31 @@ -Execute poe command line: poe ./ration_example.py -0:0/4: Sent 0 -1:1/4: Recvd 0 -1:1/4: Recvd 1 -0:0/4: Sent 1 -0:0/4: Sent 2 -1:1/4: Recvd 2 -0:0/4: Sent 3 -1:1/4: Recvd 3 -0:0/4: Sent 4 -1:1/4: Recvd 4 -0:0/4: Sent 5 -1:1/4: Recvd 5 -0:0/4: Sent 6 -1:1/4: Recvd 6 -0:0/4: Sent 7 -1:1/4: Recvd 7 -0:0/4: Sent 8 -1:1/4: Recvd 8 -0:0/4: Sent 9 -1:1/4: Recvd 9 -0:0/4: Sent None -1:1/4: Recvd None -1:1/4: Out of loop -0:0/4: Sent None -0:0/4: Sent None -0:0/4: Out of loop -2:2/4: Recvd None -2:2/4: Out of loop -3:3/4: Recvd None -3:3/4: Out of loop -0:Done. -Execute poe command line: poe ./ration_example.py -0:0/4: Sent 0 -1:1/4: Recvd 0 -1:1/4: Recvd 1 -0:0/4: Sent 1 -0:0/4: Sent 2 -1:1/4: Recvd 2 -0:0/4: Sent 3 -1:1/4: Recvd 3 -0:0/4: Sent 4 -1:1/4: Recvd 4 -0:0/4: Sent 5 -0:0/4: Sent 6 -1:1/4: Recvd 6 -0:0/4: Sent 7 -2:2/4: Recvd 5 -0:0/4: Sent 8 -1:1/4: Recvd 8 -3:3/4: Recvd 7 -0:0/4: Sent 9 -2:2/4: Recvd 9 -0:0/4: Sent None -1:1/4: Recvd None -1:1/4: Out of loop -0:0/4: Sent None -3:3/4: Recvd None -3:3/4: Out of loop -0:0/4: Sent None -0:0/4: Out of loop -2:2/4: Recvd None -2:2/4: Out of loop -0:Done. +2/4: Recvd 0 +2/4: Recvd 3 +2/4: Recvd 6 +2/4: Recvd 9 +2/4: Recvd None +2/4: Out of loop +0/4: Sent 0 +0/4: Sent 1 +0/4: Sent 2 +0/4: Sent 3 +0/4: Sent 4 +0/4: Sent 5 +0/4: Sent 6 +0/4: Sent 7 +0/4: Sent 8 +0/4: Sent 9 +0/4: Sent None +0/4: Sent None +0/4: Sent None +0/4: Out of loop +Done. +1/4: Recvd 1 +1/4: Recvd 4 +1/4: Recvd 7 +1/4: Recvd None +1/4: Out of loop +3/4: Recvd 2 +3/4: Recvd 5 +3/4: Recvd 8 +3/4: Recvd None +3/4: Out of loop diff --git a/Tools/ration_script_geyser b/Tools/ration_script_geyser new file mode 100755 index 00000000..95da5860 --- /dev/null +++ b/Tools/ration_script_geyser @@ -0,0 +1,34 @@ +#!/bin/bash -l + +## test the mpi4py and ASAPPyTools utility on geyser with ncar_pylib virtualenv + +#SBATCH -t 00:05:00 +#SBATCH -n 4 +#SBATCH -N 2 +#SBATCH --ntasks-per-node=2 +#SBATCH -p dav +#SBATCH -J ration_test +#SBATCH -A P93300606 +#SBATCH -C geyser +#SBATCH --mem 1G +#SBATCH -e ration_test.err.%J +#SBATCH -o ration_test.out.%J + +export MP_LABELIO=yes + +module load python/2.7.14 + +. /glade2/work/aliceb/sandboxes/dev/postprocessing_geyser/cesm-env2/bin/activate + +srun ./ration_test.py >> ./ration.log + +status=$? +echo $status + +deactivate + +echo $status + + + + diff --git a/Tools/ration_example.py b/Tools/ration_test.py similarity index 100% rename from Tools/ration_example.py rename to Tools/ration_test.py diff --git a/averager/pp_tests/serial_ice_slice.py b/averager/pp_tests/serial_ice_slice.py new file mode 100755 index 00000000..946ddf60 --- /dev/null +++ b/averager/pp_tests/serial_ice_slice.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +from pyaverager import PyAverager, specification, PreProc +import os + +#### User modify #### + +in_dir='/glade/scratch/aliceb/b.e21.B1850.f09_g17.CMIP6-piControl.001/ice/hist/' +out_dir= '/glade/scratch/aliceb/b.e21.B1850.f09_g17.CMIP6-piControl.001/ice/slice/' +pref= 'b.e21.B1850.f09_g17.CMIP6-piControl.001.cice.h' +htype= 'slice' +average= ['ya:355','jfm:351:355'] +wght= False +ncfrmt = 'netcdf' +serial=True + +suffix= 'nc' +date_pattern= 'yyyymm-yyyymm' +clobber = True + +ice_obs_file = '/glade/p/cesm/omwg/grids/gx1v7_grid.nc' +reg_file ='/glade/p/cesm/pcwg/ice/data/REGION_MASK_gx1v7.nc' +year0 = 351 +year1 = 355 +ncl_location = '/glade/work/aliceb/sandboxes/dev/postprocessing_new/ice_diag//code/' + +#### End user modify #### + +pyAveSpecifier = specification.create_specifier(in_directory=in_dir, + out_directory=out_dir, + prefix=pref, + suffix=suffix, + date_pattern=date_pattern, + hist_type=htype, + avg_list=average, + weighted=wght, + ncformat=ncfrmt, + serial=serial, + ice_obs_file=ice_obs_file, + reg_file=reg_file, + year0=year0, + year1=year1, + clobber=clobber, + ncl_location=ncl_location) + +PreProc.run_pre_proc(pyAveSpecifier) +PyAverager.run_pyAverager(pyAveSpecifier) + diff --git a/averager/pyAverager/README b/averager/pyAverager/README index 8f26f3ce..0667e498 100644 --- a/averager/pyAverager/README +++ b/averager/pyAverager/README @@ -1,4 +1,4 @@ -February 14, 2018 +September 6, 2018 ====================================== @@ -9,7 +9,7 @@ PyAverager A package used for computing averages from climate model output. Authors: Sheri Mickelson, Kevin Paul, and John Dennis -Version: 0.9.15 +Version: 0.9.16 Copyright: Contained within LICENSE.txt Comments and feedback: mickelso@ucar.edu diff --git a/averager/pyAverager/pyaverager/PreProc.py b/averager/pyAverager/pyaverager/PreProc.py index 4bae22b6..e3f6435d 100644 --- a/averager/pyAverager/pyaverager/PreProc.py +++ b/averager/pyAverager/pyaverager/PreProc.py @@ -18,34 +18,6 @@ def __init__(self,spec): self.create_pre_proc(spec) - def read_obs(self,obs_file,tarea,tlong,tlat): - - ''' - Read in the ice observation file to get area, lat, and lon values. - - @param obs_file The observation file to pull values from. - - @param tarea The variable name for tarea. - - @param tlong The variable name for tlong. - - @param tlat The vaiable name for tlat. - - @return lat A pointer to lat. - - @return lon A pointer to lon. - - @reutrn area The values for area. - - ''' - - file_hndl = Nio.open_file(obs_file,'r') - lat = file_hndl.variables[tlat] - lon = file_hndl.variables[tlong] - area = file_hndl.variables[tarea] - area = area[:]*1.0e-4 - return lat,lon,area - def read_reg_mask(self,reg_file,reg_name): ''' @@ -197,7 +169,13 @@ def create_pre_proc(self,spec): tarea = 'TAREA' tlong = 'TLONG' tlat = 'TLAT' - o_lat,o_lon,o_area = self.read_obs(obs_file,tarea,tlong,tlat) + + # Read in the ice observation file to get area, lat, and lon values. + obs_file_hndl = Nio.open_file(obs_file,'r') + o_lat = obs_file_hndl.variables[tlat] + o_lon = obs_file_hndl.variables[tlong] + o_area = obs_file_hndl.variables[tarea] + o_area = o_area[:]*1.0e-4 # If using time series files, open the variable's file now if (spec.hist_type == 'series'): diff --git a/averager/pyAverager/pyaverager/__init__.py b/averager/pyAverager/pyaverager/__init__.py index 186989d7..a484b342 100644 --- a/averager/pyAverager/pyaverager/__init__.py +++ b/averager/pyAverager/pyaverager/__init__.py @@ -2,5 +2,5 @@ import PyAverager, specification, PreProc -__version__ = "0.9.15" +__version__ = "0.9.16" diff --git a/averager/pyAverager/setup.py b/averager/pyAverager/setup.py index 19ed3616..81cc573e 100644 --- a/averager/pyAverager/setup.py +++ b/averager/pyAverager/setup.py @@ -3,7 +3,7 @@ from distutils.core import setup setup(name='PyAverager', - version='0.9.15', + version='0.9.16', description='Parallel Python Averager for Climate Data', author='Sheri Mickelson', author_email='mickelso@ucar.edu', diff --git a/cesm_utils/cesm_utils/cesmEnvLib.py b/cesm_utils/cesm_utils/cesmEnvLib.py index c2d7f039..c99d5b24 100755 --- a/cesm_utils/cesm_utils/cesmEnvLib.py +++ b/cesm_utils/cesm_utils/cesmEnvLib.py @@ -203,6 +203,8 @@ def get_hostname(): index = hostname.find(".") if index > 0: hostname = hostname[0:index] + else: + hostname = re.split('(\d+)',hostname)[0] return hostname diff --git a/cesm_utils/cesm_utils/create_postprocess b/cesm_utils/cesm_utils/create_postprocess index 7d8cb817..4ba8749a 100755 --- a/cesm_utils/cesm_utils/create_postprocess +++ b/cesm_utils/cesm_utils/create_postprocess @@ -27,6 +27,7 @@ if sys.hexversion < 0x02070000: import argparse import collections import errno +import getpass import itertools import os import platform @@ -57,7 +58,7 @@ try: except KeyError: err_msg = ('create_postprocess ERROR: please set the POSTPROCESS_PATH environment variable.' \ ' For example on cheyenne: setenv POSTPROCESS_PATH /glade/p/cesm/postprocessing_ch' \ - ' In addition, for geyser support: setenv POSTPROCESS_PATH_GEYSER /glade/p/cesm/postprocessing_geyser') + ' In addition, for NCAR DAV support use the --add-dav command line option.') raise OSError(err_msg) cesm_pp_path = os.environ["POSTPROCESS_PATH"] @@ -129,6 +130,18 @@ def commandline_options(): parser.add_argument('-username', '--username', nargs=1, required=False, help='User name (optional). Defaults to user login name.') + parser.add_argument('-add-dav', '--add-dav', dest='add_dav', nargs=1, required=False, + help='Fully qualified path to the root of the CESM postprocessing ' \ + 'virtualenv for the NCAR DAV cluster. This option sets the XML ' \ + 'variable POSTPROCESS_PATH_DAV in env_postprocess.xml and ' \ + 'creates all the necessary postprocessing batch scripts for the ' \ + 'NCAR DAV Slurm manager. This option is only available when create_postprocess ' \ + 'is run on NCAR machine cheyenne. A set of batch submission scripts in the ' \ + 'postprocessing caseroot with the "_dav" extension are included along side '\ + 'the cheyenne PBS submission scripts. '\ + 'Example: /glade/p/cesm/postprocessing_dav. '\ + 'Defaults to "undefined". (optional)') + options = parser.parse_args() return options @@ -270,7 +283,9 @@ def read_machine_xml(machineName, xmlFile): # get the timeseries pes first tseries_pes = xmlmachine.find('timeseries_pes') machine['timeseries_pes'] = tseries_pes.text - machine['timeseries_queue'] = tseries_pes.get('queue').lower() + machine['timeseries_queue'] = '' + if 'queue' in tseries_pes.attrib: + machine['timeseries_queue'] = tseries_pes.get('queue').lower() machine['timeseries_ppn'] = tseries_pes.get('pes_per_node').lower() machine['timeseries_wallclock'] = tseries_pes.get('wallclock').lower() machine['timeseries_nodes'] = '' @@ -284,7 +299,9 @@ def read_machine_xml(machineName, xmlFile): # get the conform pes xconform_pes = xmlmachine.find('xconform_pes') machine['xconform_pes'] = xconform_pes.text - machine['conform_queue'] = xconform_pes.get('queue').lower() + machine['conform_queue'] = '' + if 'queue' in xconform_pes.attrib: + machine['conform_queue'] = xconform_pes.get('queue').lower() machine['conform_ppn'] = xconform_pes.get('pes_per_node').lower() machine['conform_wallclock'] = xconform_pes.get('wallclock').lower() machine['conform_nodes'] = '' @@ -318,7 +335,9 @@ def read_machine_xml(machineName, xmlFile): avg = comp.find('averages_pes') if avg is not None: machine['{0}_averages_pes'.format(compName)] = avg.text - machine['{0}_averages_queue'.format(compName)] = avg.get('queue').lower() + machine['{0}_averages_queue'.format(compName)] = '' + if 'queue' in avg.attrib: + machine['{0}_averages_queue'.format(compName)] = avg.get('queue').lower() machine['{0}_averages_ppn'.format(compName)] = avg.get('pes_per_node').lower() machine['{0}_averages_wallclock'.format(compName)] = avg.get('wallclock').lower() machine['{0}_averages_nodes'.format(compName)] = '' @@ -332,7 +351,9 @@ def read_machine_xml(machineName, xmlFile): diags = comp.find('diagnostics_pes') if diags is not None: machine['{0}_diagnostics_pes'.format(compName)] = diags.text - machine['{0}_diagnostics_queue'.format(compName)] = diags.get('queue').lower() + machine['{0}_diagnostics_queue'.format(compName)] = '' + if 'queue' in diags.attrib: + machine['{0}_diagnostics_queue'.format(compName)] = diags.get('queue').lower() machine['{0}_diagnostics_ppn'.format(compName)] = diags.get('pes_per_node').lower() machine['{0}_diagnostics_wallclock'.format(compName)] = diags.get('wallclock').lower() machine['{0}_diagnostics_nodes'.format(compName)] = '' @@ -345,7 +366,9 @@ def read_machine_xml(machineName, xmlFile): init = comp.find('initialize_pes') if init is not None: machine['{0}_initialize_pes'.format(compName)] = init.text - machine['{0}_initialize_queue'.format(compName)] = init.get('queue').lower() + machine['{0}_initialize_queue'.format(compName)] = '' + if 'queue' in init.attrib: + machine['{0}_initialize_queue'.format(compName)] = init.get('queue').lower() machine['{0}_initialize_ppn'.format(compName)] = init.get('pes_per_node').lower() machine['{0}_initialize_wallclock'.format(compName)] = init.get('wallclock').lower() machine['{0}_initialize_nodes'.format(compName)] = '' @@ -358,7 +381,9 @@ def read_machine_xml(machineName, xmlFile): regrid = comp.find('regrid_pes') if regrid is not None: machine['{0}_regrid_pes'.format(compName)] = regrid.text - machine['{0}_regrid_queue'.format(compName)] = regrid.get('queue').lower() + machine['{0}_regrid_queue'.format(compName)] = '' + if 'queue' in regrid.attrib: + machine['{0}_regrid_queue'.format(compName)] = regrid.get('queue').lower() machine['{0}_regrid_ppn'.format(compName)] = regrid.get('pes_per_node').lower() machine['{0}_regrid_wallclock'.format(compName)] = regrid.get('wallclock').lower() machine['{0}_regrid_nodes'.format(compName)] = '' @@ -560,10 +585,17 @@ def initialize_main(envDict, options, standalone): envDict['PROJECT'] = options.project[0] # set the user name - envDict['USER_NAME'] = os.getlogin() + envDict['USER_NAME'] = getpass.getuser() if options.username: envDict['USER_NAME'] = options.username[0] + # set the POSTPROCESS_PATH_DAV if option add-dav is specified + envDict['POSTPROCESS_PATH_DAV'] = 'undefined' + if options.add_dav: + # check to make sure virtualenv exists + if os.path.isfile('{0}/cesm-env2/bin/activate_this.py'.format(options.add_dav[0])): + envDict['POSTPROCESS_PATH_DAV'] = options.add_dav[0] + return envDict # ------------------------------------------------------------------------------- @@ -607,15 +639,6 @@ def main(options): if not envDict['MACH']: raise OSError('create_postprocess ERROR: hostname "{0}" is not currently supported. Exiting...'.format(hostname)) - # check if env POSTPROCESS_PATH_GEYSER needs to be set - if (envDict['MACH'] == 'cheyenne' or envDict['MACH'] == 'geyser'): - try: - envDict["POSTPROCESS_PATH_GEYSER"] = os.environ["POSTPROCESS_PATH_GEYSER"] - except KeyError: - err_msg = ('create_postprocess ERROR: please set the POSTPROCESS_PATH_GEYSER environment variable.' \ - ' For example, setenv POSTPROCESS_PATH_GEYSER /glade/p/cesm/postprocessing_geyser') - raise OSError(err_msg) - # make the appropriate dirs in the caseroot try: os.mkdir(pp_case_path) @@ -874,16 +897,16 @@ def main(options): imb_env_vars='{% for env in imb_env_vars %}\n{{ env }}\n{% endfor %}', imb_options='{{ imb_options }}') - # check if machine is cheyenne then create a set of geyser submission scripts - if envDict['MACH'] == 'cheyenne': - hostname = 'geyser' + # check if machine is cheyenne then create a set of dav submission scripts + if envDict['MACH'] == 'cheyenne' and envDict['POSTPROCESS_PATH_DAV'] != 'undefined': + hostname = 'dav' envDict['MACH'] = cesmEnvLib.get_machine_name(hostname, '{0}/Machines/machine_postprocess.xml'.format(envDict['POSTPROCESS_PATH'])) - pp_geyser = os.environ["POSTPROCESS_PATH_GEYSER"] + pp_dav = envDict["POSTPROCESS_PATH_DAV"] # get the machine dependent variables, modules and mpi run command in a dictionary machine = dict() machine = read_machine_xml(machineName=envDict['MACH'], - xmlFile='{0}/Machines/machine_postprocess.xml'.format(pp_geyser)) + xmlFile='{0}/Machines/machine_postprocess.xml'.format(pp_dav)) # define the template files for the batch scripts batch_tmpl = 'batch_{0}.tmpl'.format(envDict['MACH']) @@ -891,9 +914,9 @@ def main(options): # generate the timeseries batch submit script from template files postProcessCmd = 'cesm_tseries_generator.py' - processName = 'timeseries_geyser' + processName = 'timeseries_dav' outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'],processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['timeseries_pes'], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -913,10 +936,10 @@ def main(options): # generate the xconform batch submit script from template files postProcessCmd = 'cesm_conform_generator.py' - processName = 'xconform_geyser' + processName = 'xconform_dav' outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'],processName) xconform_tmpl = 'xconform.tmpl' - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['xconform_pes'], batchTmpl=batch_tmpl, runTmpl=xconform_tmpl, postProcessCmd=postProcessCmd, @@ -936,10 +959,10 @@ def main(options): # generate the iconform batch submit script from template files postProcessCmd = 'cesm_conform_initialize.py' - processName = 'iconform_geyser' + processName = 'iconform_dav' outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'],processName) iconform_tmpl = 'iconform.tmpl' - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['xconform_pes'], batchTmpl=batch_tmpl, runTmpl=iconform_tmpl, postProcessCmd=postProcessCmd, @@ -961,9 +984,9 @@ def main(options): for comp in compList: # generate the averages batch submit script postProcessCmd = '{0}_avg_generator.py'.format(comp) - processName = '{0}_averages_geyser'.format(comp) + processName = '{0}_averages_dav'.format(comp) outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'], processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['{0}_averages_pes'.format(comp)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -983,9 +1006,9 @@ def main(options): # generate the diagnostics batch submit script postProcessCmd = '{0}_diags_generator.py'.format(comp) - processName = '{0}_diagnostics_geyser'.format(comp) + processName = '{0}_diagnostics_dav'.format(comp) outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'], processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['{0}_diagnostics_pes'.format(comp)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -1007,9 +1030,9 @@ def main(options): for comp in regridList: # generate the regrid batch submit script postProcessCmd = '{0}_regrid_generator.py'.format(comp) - processName = '{0}_regrid_geyser'.format(comp) + processName = '{0}_regrid_dav'.format(comp) outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'], processName) - create_batch(pp_geyser, + create_batch(pp_dav, pes=machine['{0}_regrid_pes'.format(comp)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -1031,9 +1054,9 @@ def main(options): for imb in imbList: # generate the serial script that sets up the imb config file. postProcessCmd = 'imb_initialize.py' - processName = '{0}_initialize_geyser'.format(imb) + processName = '{0}_initialize_dav'.format(imb) outFile = '{0}/{1}'.format(envDict['PP_CASE_PATH'], processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['{0}_initialize_pes'.format(imb)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, @@ -1061,9 +1084,9 @@ def main(options): # variables and run imb_initialize multiple times, each time # write it out to imb_diagnostics. postProcessCmd = 'imb_diags_generator.py' - processName = '{0}_diagnostics_geyser'.format(imb) + processName = '{0}_diagnostics_dav'.format(imb) outFile = '{0}/{1}.tmpl'.format(envDict['PP_CASE_PATH'], processName) - create_batch(ppDir=pp_geyser, + create_batch(ppDir=pp_dav, pes=machine['{0}_diagnostics_pes'.format(imb)], batchTmpl=batch_tmpl, runTmpl=run_tmpl, postProcessCmd=postProcessCmd, diff --git a/conform/conform/__init__.py b/conform/conform/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/create_python_env b/create_python_env index 6a8f306e..c4033164 100755 --- a/create_python_env +++ b/create_python_env @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash -l # # script to setup the python virtual environment for postprocessing # @@ -58,6 +58,7 @@ progname=`basename $0` # need absolute path (rather than relative path) because we use this # path to get to the machines directory pp_dir=$(absolute_path `dirname $0`) +export pp_dir #---------------------------------------------------------------------- # Set default return values @@ -136,14 +137,12 @@ fi #---------------------------------------------------------------------- env="${pp_dir}/cesm-env2" echo $env -if [ -f $env ]; then +if [ ! -d $env ]; then status="ERROR" - info="$progname - ${pp_dir}/cesm-env2 virtual environment already exists. -It is only necessary to create the virtual environment once for a given machine. -All post processing scripts residing in a CASE directory will activate and deactivate -the virtual environment as necessary. + info="$progname - ${pp_dir}/cesm-env2 virtual environment does not exist. +Check the $module_script for the correct ncar_pylib virtualenv clone command. +If a new or updated virtual environment needs to be created then follow these steps: -If a new or updated virtual environment needs to be created then following these steps: >cd ${pp_dir} >make clobber >make clobber-env @@ -161,15 +160,17 @@ cd $pp_dir # create the virtual environment. Makefile checks to see if it is # already setup, so only done once per case. #---------------------------------------------------------------------- -echo "$progname - making virtual environment in ${pp_dir}/cesm-env2." -make env -if [ $? -ne 0 ]; then - echo "ERROR: Unable to create virtual environment in ${pp_dir}/cesm-env2. Exiting..." - exit 1 -fi +##echo "$progname - making virtual environment in ${pp_dir}/cesm-env2." +##if [ ${machine} != geyser ]; then +## make env +## if [ $? -ne 0 ]; then +## echo "ERROR: Unable to create virtual environment in ${pp_dir}/cesm-env2. Exiting..." +## exit 1 +## fi +##fi #---------------------------------------------------------------------- -# activate it for this script +# activate virtualenv for remainder of this script #---------------------------------------------------------------------- echo "$progname - activating virtual environment in ${pp_dir}/cesm-env2." . cesm-env2/bin/activate @@ -202,14 +203,14 @@ fi #---------------------------------------------------------------------- # compile and install ocn_diag remap shared object (remap.so) program #---------------------------------------------------------------------- -echo "---------------------------------------------------------" -echo "$progname - Compiling ocn diagnostics remap.so" +##echo "---------------------------------------------------------" +##echo "$progname - Compiling ocn diagnostics remap.so" # reads XML and call subprocess f2py -create_f2py_remap --machine $machine -if [ $? -ne 0 ]; then - echo "WARNING: Problem with ocean diagnostics create_f2py_remap in $pp_dir" -fi +##create_f2py_remap --machine $machine +##if [ $? -ne 0 ]; then +## echo "WARNING: Problem with ocean diagnostics create_f2py_remap in $pp_dir" +##fi #---------------------------------------------------------------------- # compile and install ocn_diag zonal average (za) program diff --git a/diagnostics/diagnostics/imb/Config/config_diags_iomb.xml b/diagnostics/diagnostics/imb/Config/config_diags_iomb.xml index 3fde67f3..f45f1751 100644 --- a/diagnostics/diagnostics/imb/Config/config_diags_iomb.xml +++ b/diagnostics/diagnostics/imb/Config/config_diags_iomb.xml @@ -43,12 +43,12 @@ desc="matplotlib backend for generating graphics, should be exported to the environment!" > - diff --git a/diagnostics/diagnostics/imb/imb_initialize.py b/diagnostics/diagnostics/imb/imb_initialize.py index a5397b54..8d217a82 100755 --- a/diagnostics/diagnostics/imb/imb_initialize.py +++ b/diagnostics/diagnostics/imb/imb_initialize.py @@ -175,7 +175,9 @@ def expand_batch_vars(envDict, imb_name): except: raise RuntimeError('CLI_OPTIONS must be specified in the imb env xml file.') - diag_root = "{0}_ROOT".format(imb_name.upper()) +## diag_root = "{0}_ROOT".format(imb_name.upper()) + # The ROOT env var should always be set to ILAMB_ROOT regardless of whether running ILAMB or IOMB + diag_root = "ILAMB_ROOT" env_vars = [] env_vars.append("export {0}={1}".format('MPLBACKEND', envDict['MPLBACKEND'])) env_vars.append("export {0}={1}".format(diag_root, envDict[diag_root])) @@ -204,9 +206,9 @@ def expand_batch_vars(envDict, imb_name): print(' {0} - {1}'.format(e.cmd, e.output)) - # create a template and batch for geyser slurm - if envDict['MACH'] == 'cheyenne' or envDict['MACH'] == 'geyser': - hostname = 'geyser' + # create a template and batch for DAV slurm + if envDict['MACH'] == 'cheyenne' or envDict['MACH'] == 'dav': + hostname = 'dav' template_filename = '{0}_diagnostics_{1}.tmpl'.format(imb_name, hostname) templateLoader = jinja2.FileSystemLoader( searchpath='{0}'.format(envDict["CASEROOT"]) ) templateEnv = jinja2.Environment( loader=templateLoader ) diff --git a/ilamb/ilamb/README.rst b/ilamb/ilamb/README.rst index 86120b54..ebbe75ee 100644 --- a/ilamb/ilamb/README.rst +++ b/ilamb/ilamb/README.rst @@ -31,27 +31,38 @@ Useful Information * `CLM `_ - land comparison against 3 CLM versions and 2 forcings * `CMIP5 `_ - land comparison against a collection of CMIP5 models * `IOMB `_ - ocean comparison against a few ocean models - + +* Paper `preprint `_ which + details the design and methodology employed in the ILAMB package * If you find the package or the ouput helpful in your research or development efforts, we kindly ask you to cite the following reference (DOI:10.18139/ILAMB.v002.00/1251621). -ILAMB 2.2 Release +ILAMB 2.3 Release ----------------- -We are pleased to announce version 2.2 of the ILAMB python package. Among many small bugfixes and enhancements, the new version contains the following new features: - -* A new installed command ``ilamb-fetch`` has been included which can be run to automatically download the observational datasets. Running this command after the data has been downloaded will check your collection for updates and consistency. -* A new installed command ``ilamb-doctor`` has been included which can be run with options similar to ``ilamb-run`` to help identify which values a particular configure file needs in order to run. -* ILAMB will now check the spatial extents of all the models present in the current run and clip away to the largest shared extent. This allows ILAMB to be applied to regional models. -* User-defined regions can now be added at runtime either by specifying latitude/longitude bounds, or a mask in a netCDF4 file. For specifics, consult the regions `tutorial `_. -* Added a runoff and evaporative fraction benchmark to the ILAMB canon, removed the GFED3 and GFED4 burned area data products. -* Added many more plots to the generic output including the RMSE and the score maps. -* The ILAMB core has been enhanced to better handle depths. This has enabled ocean comparisons among others. -* An initial collection of ocean datasets has been assembled in the ``demo/iomb.cfg`` file for ocean benchmarking. -* The plotting phase of ``ilamb-run`` may now be skipped with the ``--skip_plots`` option. -* Relationship overall scores are now available in an image on the main html output page. -* Additional `tutorials `_ have been added to explain these new features. +We are pleased to announce version 2.3 of the ILAMB python +package. Among many bugfixes and improvements we highlight these major +changes: + +* You may observe a large shift in some score values. In this version + we solidified our scoring methodology while writing a `paper + `_ which necesitated + reworking some of the scores. For details, see the linked paper. +* Made a memory optimization pass through the analysis routines. Peak + memory usage and the time at peak was reduced improving performance. +* Restructured the symbolic manipulation of derived variables to + greatly reduce the required memory. +* Moved from using cfunits to cf_units. Both are python wrappers + around the UDUNITS library, but cfunits is stagnant and placed a + lower limit to the version of the netCDF4 python wrappers we could + use. +* The scoring of the interannual variability was missed in the port + from version 1 to 2, we have added the metric. +* The terrestrial water storage anomaly GRACE metric was changed to + compare mean anomaly values over large river basins. For details see + the ILAMB paper. + Funding ------- diff --git a/ilamb/ilamb/bin/ilamb-run b/ilamb/ilamb/bin/ilamb-run index a98e05fd..03f7b9d5 100644 --- a/ilamb/ilamb/bin/ilamb-run +++ b/ilamb/ilamb/bin/ilamb-run @@ -378,6 +378,7 @@ def WorkPost(M,C,W,S,verbose=False,skip_plots=False): print (" {0:>%d} {1:<%d} %s%s%s" % (maxCL,maxML,FAIL,ex.__class__.__name__,ENDC)).format(c.longname,m.name) sys.stdout.flush() + comm.Barrier() for c in C: if not skip_plots: try: @@ -476,7 +477,8 @@ parser.add_argument('--model_setup', dest="model_setup", type=str, nargs='+',def help='list files model setup information') parser.add_argument('--skip_plots', dest="skip_plots", action="store_true", help='enable to skip the plotting phase') - +parser.add_argument('--rel_only', dest="rel_only", action="store_true", + help='enable only display relative differences in overall scores') args = parser.parse_args() if args.config is None: if rank == 0: @@ -505,7 +507,8 @@ S = Scoreboard(args.config[0], master = rank==0, verbose = not args.quiet, build_dir = args.build_dir[0], - extents = RestrictiveModelExtents(M)) + extents = RestrictiveModelExtents(M), + rel_only = args.rel_only) C = MatchRelationshipConfrontation(S.list()) Cf = FilterConfrontationList(C,args.confront) @@ -520,7 +523,7 @@ if args.logging: if rank == 0: logger.info(" " + " ".join(os.uname())) - for key in ["ILAMB","numpy","matplotlib","netCDF4","cfunits","sympy","mpi4py"]: + for key in ["ILAMB","numpy","matplotlib","netCDF4","cf_units","sympy","mpi4py"]: pkg = __import__(key) try: path = pkg.__path__[0] diff --git a/ilamb/ilamb/bin/ilamb-table b/ilamb/ilamb/bin/ilamb-table new file mode 100644 index 00000000..374b6abb --- /dev/null +++ b/ilamb/ilamb/bin/ilamb-table @@ -0,0 +1,65 @@ +#!/usr/bin/env python +""" +""" +from ILAMB.Scoreboard import Scoreboard +from netCDF4 import Dataset +import os,argparse,sys + +parser = argparse.ArgumentParser(description=__doc__) +parser.add_argument('--config', dest="config", metavar='config', type=str, nargs=1, + help='path to configuration file to use') +parser.add_argument('--build_dir', dest="build_dir", metavar='build_dir', type=str, nargs=1,default=["./_build"], + help='path of where to save the output') +parser.add_argument('--csv_file', dest="csv", metavar='csv', type=str, nargs=1,default=["table.csv"], + help='destination filename for the table') + +args = parser.parse_args() +if args.config is None: + print "\nError: You must specify a configuration file using the option --config\n" + sys.exit(1) + +S = Scoreboard(args.config[0],verbose=False,build_dir=args.build_dir[0]) + +region = "global" +scalar = "RMSE" +sname = "%s %s" % (scalar,region) +group = "MeanState" +table = {} +unit = {} +for c in S.list(): + for subdir, dirs, files in os.walk(c.output_path): + for fname in files: + if not fname.endswith(".nc"): continue + with Dataset(os.path.join(c.output_path,fname)) as dset: + if group not in dset.groups .keys(): continue + if "scalars" not in dset.groups[group].groups.keys(): continue + grp = dset.groups[group]["scalars"] + if sname not in grp.variables.keys(): continue + var = grp.variables[sname] + if not table.has_key(c.longname): + table[c.longname] = {} + unit [c.longname] = var.units + table[c.longname][dset.name] = var[...] + +# What models have data? +models = [] +for key in table.keys(): + for m in table[key].keys(): + if m not in models: models.append(m) +models.sort() + +# render a table of values in csv format +lines = ",".join(["Name","Units"] + models) +for c in S.list(): + if not table.has_key(c.longname): continue + line = "%s,%s" % (c.longname,unit[c.longname]) + for m in models: + if table[c.longname].has_key(m): + line += ",%g" % (table[c.longname][m]) + else: + line += "," + lines += "\n%s" % line + +with file(args.csv[0],mode="w") as f: + f.write(lines) + diff --git a/ilamb/ilamb/demo/ilamb.cfg b/ilamb/ilamb/demo/ilamb.cfg index 83692eb2..793227fd 100644 --- a/ilamb/ilamb/demo/ilamb.cfg +++ b/ilamb/ilamb/demo/ilamb.cfg @@ -284,6 +284,7 @@ skip_iav = True [h2: Terrestrial Water Storage Anomaly] variable = "twsa" alternate_vars = "tws" +derived = "pr-evspsbl-mrro" cmap = "Blues" weight = 5 ctype = "ConfTWSA" @@ -292,6 +293,31 @@ ctype = "ConfTWSA" source = "DATA/twsa/GRACE/twsa_0.5x0.5.nc" weight = 25 +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Snow Water Equivalent] +variable = "swe" +alternate_vars = "snw" +cmap = "Blues" +weight = 5 + +[CanSISE] +source = "DATA/swe/CanSISE/swe.nc" +weight = 25 + +#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +[h2: Permafrost] +variable = "tsl" + +[NSIDC] +ctype = "ConfPermafrost" +source = "DATA/permafrost/NSIDC/NSIDC_0.5x0.5.nc" +y0 = 1970. +yf = 2000. +Teps = 273.15 +dmax = 3.5 + ########################################################################### [h1: Radiation and Energy Cycle] diff --git a/ilamb/ilamb/doc/install.rst b/ilamb/ilamb/doc/install.rst index 7ccfaaf9..84a85043 100644 --- a/ilamb/ilamb/doc/install.rst +++ b/ilamb/ilamb/doc/install.rst @@ -25,7 +25,7 @@ include: * netCDF4_, a python/numpy interface to the netCDF C library (you must have the C library installed) * sympy_, a python library for symbolic mathematics * mpi4py_, a python wrapper around the MPI library (you must have a MPI implementation installed) -* cfunits_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) +* cf_units_, a python interface to UNIDATA’s Udunits-2 library with CF extensions (you must have the Udunits library installed) I have designated that a few of these dependencies are python interfaces to C libraries and so the library must also be installed @@ -271,7 +271,7 @@ Next open the local copy of the file with a editor and search for .. _numpy: https://www.numpy.org/ .. _matplotlib: https://matplotlib.org/ .. _netCDF4: https://github.com/Unidata/netcdf4-python -.. _cfunits: https://bitbucket.org/cfpython/cfunits-python/ +.. _cf_units: https://github.com/SciTools/cf-units .. _basemap: https://github.com/matplotlib/basemap .. _sympy: https://www.sympy.org/ .. _mpi4py: https://pythonhosted.org/mpi4py/ diff --git a/ilamb/ilamb/setup.py b/ilamb/ilamb/setup.py index e36e7ff2..b189267f 100644 --- a/ilamb/ilamb/setup.py +++ b/ilamb/ilamb/setup.py @@ -4,7 +4,7 @@ import subprocess import os -VERSION = '2.2' +VERSION = '2.3' def git_version(): """ @@ -97,12 +97,12 @@ def write_version_py(filename=os.path.join('src/ILAMB', 'generated_version.py')) keywords=['benchmarking','earth system modeling','climate modeling','model intercomparison'], packages=['ILAMB'], package_dir={'ILAMB' : 'src/ILAMB'}, - scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor'], - install_requires=['numpy>=1.9.2', + scripts=['bin/ilamb-run','bin/ilamb-fetch','bin/ilamb-mean','bin/ilamb-doctor','bin/ilamb-table'], + install_requires=['numpy>=1.11.0', 'matplotlib>=1.4.3', #'basemap>=1.0.7', # basemap is in pypi but broken, need to manually install - 'netCDF4>=1.1.4,<=1.2.4', # upper limit is for cfunits - 'cfunits>=1.1.4', + 'netCDF4>=1.1.4', + 'cf_units>=2.0.0', 'sympy>=0.7.6', 'mpi4py>=1.3.1', 'scipy>=0.9.0'] diff --git a/ilamb/ilamb/src/ILAMB/ConfPermafrost.py b/ilamb/ilamb/src/ILAMB/ConfPermafrost.py new file mode 100644 index 00000000..92c2ead4 --- /dev/null +++ b/ilamb/ilamb/src/ILAMB/ConfPermafrost.py @@ -0,0 +1,223 @@ +from Confrontation import Confrontation +from mpl_toolkits.basemap import Basemap +from Variable import Variable +from Post import ColorBar +import matplotlib.pyplot as plt +from netCDF4 import Dataset +import ilamblib as il +import numpy as np + +class ConfPermafrost(Confrontation): + + def __init__(self,**keywords): + + # Ugly, but this is how we call the Confrontation constructor + super(ConfPermafrost,self).__init__(**keywords) + + # Now we overwrite some things which are different here + self.layout + self.regions = ["global"] + self.layout.regions = self.regions + self.weight = { "Obs Score" : 1., + "Mod Score" : 1. } + for page in self.layout.pages: + page.setMetricPriority(["Total Area" , + "Overlap Area", + "Missed Area" , + "Excess Area" , + "Obs Score" , + "Mod Score" , + "Overall Score"]) + + def stageData(self,m): + + obs = Variable(filename = self.source, + variable_name = "permafrost_extent") + + # These parameters may be changed from the configure file + y0 = float(self.keywords.get("y0" ,1970.)) # [yr] beginning year to include in analysis + yf = float(self.keywords.get("yf" ,2000.)) # [yr] end year to include in analysis + dmax = float(self.keywords.get("dmax",3.5)) # [m] consider layers where depth in is the range [0,dmax] + Teps = float(self.keywords.get("Teps",273.15)) # [K] temperature below which we assume permafrost occurs + + t0 = (y0 -1850.)*365. + tf = (yf+1-1850.)*365. + mod = m.extractTimeSeries(self.variable, + initial_time = t0, + final_time = tf) + mod.trim(t = [t0 ,tf ], + lat = [obs.lat.min(),90 ], + d = [0 ,dmax]) + mod = mod.annualCycle() + Tmax = mod.data.max(axis=0) + table = np.zeros(Tmax.shape[-2:]) + table[...] = np.NAN + thaw = np.zeros(table.shape,dtype=bool) + for i in range(mod.depth_bnds.shape[0]-1,-1,-1): + thaw += (Tmax[i]>=Teps) + frozen = np.where((Tmax[i] 60: fsize = 10 - ax.set_ylabel(ylabel,fontsize=fsize) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() - - # add the figure to the HTML layout - if name == "Benchmark" and region == "global": - short_name = short_name.replace("global_","") - page.addFigure(c.longname, - "benchmark_" + short_name, - "Benchmark_RNAME_%s.png" % (short_name), - legend = False, - benchmark = False) - page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, - benchmark = False) - - # determine the 1D relationship curves - bins = np.linspace(ind_min,ind_max,nbin+1) - delta = 0.1*(bins[1]-bins[0]) - inds = np.digitize(x,bins) - ids = np.unique(inds).clip(1,bins.size-1) - xb = [] - yb = [] - eb = [] - for i in ids: - yt = y[inds==i] - xi = 0.5 - xb.append(xi*bins[i-1]+(1.-xi)*bins[i]) - yb.append(yt.mean()) - try: - eb.append(yt.std()) # for some reason this fails sometimes - except: - eb.append(np.sqrt(((yt-yb[-1])**2).sum()/float(yt.size))) - - if name == "Benchmark": - obs_x = np.asarray(xb) - obs_y = np.asarray(yb) - obs_e = np.asarray(eb) - else: - mod_x = np.asarray(xb) - mod_y = np.asarray(yb) - mod_e = np.asarray(eb) - - # compute and plot the difference - O = np.array(obs_dist.data) - M = np.array(mod_dist.data) - O[np.where(obs_dist.mask)] = 0. - M[np.where(mod_dist.mask)] = 0. - dif_dist = np.ma.masked_array(M-O,mask=obs_dist.mask*mod_dist.mask) - lim = np.abs(dif_dist).max() - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - pc = ax.pcolormesh(xedges, - yedges, - dif_dist, - cmap = "Spectral_r", - vmin = -lim, - vmax = +lim) - div = make_axes_locatable(ax) - fig.colorbar(pc,cax=div.append_axes("right",size="5%",pad=0.05), - orientation="vertical", - label="Distribution Difference") - ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) - ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_diff_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() + with Dataset(os.path.join(self.output_path,"%s_%s.nc" % (self.name,m.name)),mode="r+") as results: + + # Grab/create a relationship and scalars group + group = None + if "Relationships" not in results.groups: + group = results.createGroup("Relationships") + else: + group = results.groups["Relationships"] + if "scalars" not in group.groups: + scalars = group.createGroup("scalars") + else: + scalars = group.groups["scalars"] + + # for each relationship... + for c in self.relationships: + + # try to get the independent data from the model and obs + try: + ref_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,"Benchmark"))) + com_ind = _retrieveData(os.path.join(c.output_path,"%s_%s.nc" % (c.name,m.name ))) + ind_name = c.longname.split("/")[0] + ind_min = c.limits["timeint"]["min"]-1e-12 + ind_max = c.limits["timeint"]["max"]+1e-12 + except: + continue + + # Add figures to the html page page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, + "benchmark_rel_%s" % ind_name, + "Benchmark_RNAME_rel_%s.png" % ind_name, + legend = False, benchmark = False) - - # score the distributions = 1 - Hellinger distance - score = 1.-np.sqrt(((np.sqrt(obs_dist)-np.sqrt(mod_dist))**2).sum())/np.sqrt(2) - vname = '%s Score %s' % (c.longname.split('/')[0],region) - #if vname in scalars.variables: - # scalars.variables[vname][0] = score - #else: - # Variable(name = vname, - # unit = "1", - # data = score).toNetCDF4(results,group="Relationships") - - # plot the 1D curve - fig,ax = plt.subplots(figsize=(6,5.25),tight_layout=True) - ax.errorbar(obs_x-delta,obs_y,yerr=obs_e,fmt='-o',color='k') - ax.errorbar(mod_x+delta,mod_y,yerr=mod_e,fmt='-o',color=m.color) - ax.set_xlabel("%s, %s" % ( c.longname.split("/")[0],post.UnitStringToMatplotlib(obs_ind.unit))) - ax.set_ylabel("%s, %s" % (self.longname.split("/")[0],post.UnitStringToMatplotlib(obs_dep.unit))) - ax.set_xlim(ind_min,ind_max) - ax.set_ylim(dep_min,dep_max) - short_name = "rel_func_%s" % ind_name - fig.savefig(os.path.join(self.output_path,"%s_%s_%s.png" % (name,region,short_name))) - plt.close() page.addFigure(c.longname, - short_name, - "MNAME_RNAME_%s.png" % (short_name), - legend = False, + "rel_%s" % ind_name, + "MNAME_RNAME_rel_%s.png" % ind_name, + legend = False, benchmark = False) + page.addFigure(c.longname, + "rel_diff_%s" % ind_name, + "MNAME_RNAME_rel_diff_%s.png" % ind_name, + legend = False, + benchmark = False) + page.addFigure(c.longname, + "rel_func_%s" % ind_name, + "MNAME_RNAME_rel_func_%s.png" % ind_name, + legend = False, + benchmark = False) + + # Analysis over regions + lim_dep = [dep_min,dep_max] + lim_ind = [ind_min,ind_max] + longname = c.longname.split('/')[0] + for region in self.regions: + ref_dist = _buildDistributionResponse(ref_ind,ref_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + com_dist = _buildDistributionResponse(com_ind,com_dep,ind_lim=lim_ind,dep_lim=lim_dep,region=region) + + # Make the plots + _plotDistribution(ref_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name, c.name,post.UnitStringToMatplotlib(ref_ind.unit)), + "%s/%s, %s" % (dep_name,self.name,post.UnitStringToMatplotlib(ref_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % ("Benchmark",region,ind_name))) + _plotDistribution(com_dist[0],com_dist[1],com_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_%s.png" % (m.name,region,ind_name))) + _plotDifference (ref_dist[0],com_dist[0],ref_dist[1],ref_dist[2], + "%s/%s, %s" % (ind_name,m.name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s/%s, %s" % (dep_name,m.name,post.UnitStringToMatplotlib(com_dep.unit)), + os.path.join(self.output_path,"%s_%s_rel_diff_%s.png" % (m.name,region,ind_name))) + _plotFunction (ref_dist[3],ref_dist[4],com_dist[3],com_dist[4],ref_dist[1],ref_dist[2], + "%s, %s" % (ind_name,post.UnitStringToMatplotlib(com_ind.unit)), + "%s, %s" % (dep_name,post.UnitStringToMatplotlib(com_dep.unit)), + m.color, + os.path.join(self.output_path,"%s_%s_rel_func_%s.png" % (m.name,region,ind_name))) + + # Score the distribution + score = _scoreDistribution(ref_dist[0],com_dist[0]) + sname = "%s Hellinger Distance %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + + # Score the functional response + score = _scoreFunction(ref_dist[3],com_dist[3]) + sname = "%s RMSE Score %s" % (longname,region) + if sname in scalars.variables: + scalars.variables[sname][0] = score + else: + Variable(name = sname, + unit = "1", + data = score).toNetCDF4(results,group="Relationships") + - # score the relationship - i0,i1 = np.where(np.abs(obs_x[:,np.newaxis]-mod_x)<1e-12) - obs_y = obs_y[i0]; mod_y = mod_y[i1] - isnan = np.isnan(obs_y)*np.isnan(mod_y) - obs_y[isnan] = 0.; mod_y[isnan] = 0. - score = np.exp(-np.linalg.norm(obs_y-mod_y)/np.linalg.norm(obs_y)) - vname = '%s RMSE Score %s' % (c.longname.split('/')[0],region) - if vname in scalars.variables: - scalars.variables[vname][0] = score - else: - Variable(name = vname, - unit = "1", - data = score).toNetCDF4(results,group="Relationships") - - results.close() + class FileContextManager(): diff --git a/ilamb/ilamb/src/ILAMB/ModelResult.py b/ilamb/ilamb/src/ILAMB/ModelResult.py index eff17ebf..6980d8b8 100644 --- a/ilamb/ilamb/src/ILAMB/ModelResult.py +++ b/ilamb/ilamb/src/ILAMB/ModelResult.py @@ -266,7 +266,6 @@ def derivedVariable(self,variable_name,expression,lats=None,lons=None,initial_ti """ from sympy import sympify - from cfunits import Units if expression is None: raise il.VarNotInModel() args = {} units = {} diff --git a/ilamb/ilamb/src/ILAMB/Post.py b/ilamb/ilamb/src/ILAMB/Post.py index 502f9f12..809b154d 100644 --- a/ilamb/ilamb/src/ILAMB/Post.py +++ b/ilamb/ilamb/src/ILAMB/Post.py @@ -223,11 +223,19 @@ def __init__(self,name,title): def __str__(self): r = Regions() - def _sortFigures(figure,priority=["benchmark_timeint","timeint","timeintremap","bias","rmse","benchmark_phase","phase","shift","biasscore","rmsescore","shiftscore","spatial_variance","legend_spatial_variance","spaceint","accumulate","cycle","dtcycle","compcycle","temporal_variance"]): + def _sortFigures(figure): + macro = ["timeint","bias","rmse","iav","phase","shift","variance","spaceint","accumulate","cycle"] val = 1. - for i,pname in enumerate(priority): - if pname == figure.name: val += 2**i - return val + for i,m in enumerate(macro): + if m in figure.name: val += 3**i + if figure.name.startswith("benchmark"): val -= 1. + if figure.name.endswith("score"): val += 1. + if figure.name.startswith("legend"): + if "variance" in figure.name: + val += 1. + else: + val = 0. + return val code = """
@@ -834,11 +842,14 @@ def head(self): class HtmlLayout(): def __init__(self,pages,cname,years=None): - + self.pages = pages self.cname = cname.replace("/"," / ") if years is not None: - self.cname += " / %d-%d" % (years) + try: + self.cname += " / %d-%d" % (years) + except: + pass for page in self.pages: page.pages = self.pages page.cname = self.cname @@ -1048,7 +1059,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa nvariables = len(variables) maxV = max([len(v) for v in variables]) maxM = max([len(m) for m in models]) - wpchar = 0.1 + wpchar = 0.15 wpcell = 0.19 hpcell = 0.25 w = maxV*wpchar + max(4,nmodels)*wpcell @@ -1085,6 +1096,8 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa ax[0].set_yticklabels(variables[::-1]) ax[0].tick_params('both',length=0,width=0,which='major') ax[0].tick_params(axis='y',pad=10) + ax[0].set_xlim(0,nmodels) + ax[0].set_ylim(0,nvariables) if vcolor is not None: for i,t in enumerate(ax[0].yaxis.get_ticklabels()): t.set_backgroundcolor(vcolor[::-1][i]) @@ -1117,6 +1130,7 @@ def BenchmarkSummaryFigure(models,variables,data,figname,vcolor=None,rel_only=Fa ax[i].set_xticklabels(models,rotation=90) ax[i].tick_params('both',length=0,width=0,which='major') ax[i].set_yticks([]) + ax[i].set_xlim(0,nmodels) ax[i].set_ylim(0,nvariables) if rel_only: ax[i].set_yticks (np.arange(nvariables)+0.5) diff --git a/ilamb/ilamb/src/ILAMB/Scoreboard.py b/ilamb/ilamb/src/ILAMB/Scoreboard.py index ba0b2cfb..5fceb929 100644 --- a/ilamb/ilamb/src/ILAMB/Scoreboard.py +++ b/ilamb/ilamb/src/ILAMB/Scoreboard.py @@ -5,6 +5,7 @@ from ConfEvapFraction import ConfEvapFraction from ConfIOMB import ConfIOMB from ConfDiurnal import ConfDiurnal +from ConfPermafrost import ConfPermafrost import os,re from netCDF4 import Dataset import numpy as np @@ -179,17 +180,19 @@ def ParseScoreboardConfigureFile(filename): "ConfRunoff" : ConfRunoff, "ConfEvapFraction": ConfEvapFraction, "ConfIOMB" : ConfIOMB, - "ConfDiurnal" : ConfDiurnal} + "ConfDiurnal" : ConfDiurnal, + "ConfPermafrost" : ConfPermafrost} class Scoreboard(): """ A class for managing confrontations """ - def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None): + def __init__(self,filename,regions=["global"],verbose=False,master=True,build_dir="./_build",extents=None,rel_only=False): if not os.environ.has_key('ILAMB_ROOT'): raise ValueError("You must set the environment variable 'ILAMB_ROOT'") self.build_dir = build_dir + self.rel_only = rel_only if (master and not os.path.isdir(self.build_dir)): os.mkdir(self.build_dir) @@ -272,7 +275,7 @@ def createHtml(self,M,filename="index.html"): has_rel = np.asarray([len(rel.children) for rel in rel_tree.children]).sum() > 0 nav = "" if has_rel: - GenerateRelSummaryFigure(self,M,"%s/overview_rel.png" % self.build_dir) + GenerateRelSummaryFigure(rel_tree,M,"%s/overview_rel.png" % self.build_dir,rel_only=self.rel_only) nav = """
  • Relationship
  • """ #global global_print_node_string @@ -453,7 +456,7 @@ def createBarCharts(self,M): html = GenerateBarCharts(self.tree,M) def createSummaryFigure(self,M): - GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir) + GenerateSummaryFigure(self.tree,M,"%s/overview.png" % self.build_dir,rel_only=self.rel_only) def dumpScores(self,M,filename): out = file("%s/%s" % (self.build_dir,filename),"w") @@ -582,7 +585,7 @@ def GenerateTable(tree,M,S,composite=True): BuildHTMLTable(tree,M,S.build_dir) return global_html -def GenerateSummaryFigure(tree,M,filename): +def GenerateSummaryFigure(tree,M,filename,rel_only=False): models = [m.name for m in M] variables = [] @@ -602,96 +605,34 @@ def GenerateSummaryFigure(tree,M,filename): else: data[row,:] = var.score - BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors) - -def GenerateRelSummaryFigure(S,M,figname): - - def _parse(node): - global score,count,rows - if node.level != 5: return - row = "%s vs. %s" % (node.parent.parent.parent.name,node.parent.name) - col = node.name - if row not in rows: rows.append(row) - if not score .has_key(row): score[row] = {} - if not count .has_key(row): count[row] = {} - if not score[row].has_key(col): score[row][col] = 0. - if not count[row].has_key(col): count[row][col] = 0. - score[row][col] += node.score - count[row][col] += 1. - - class rnode(): - def __init__(self,name,level): - self.name = name - self.level = level - self.parent = None - self.score = None - self.children = [] - - - root = S.build_dir - tree = rnode("root",0) - previous_node = tree - current_level = 0 - - for subdir, dirs, files in os.walk(root): - if subdir == root: continue - flat = subdir.replace(root,"").lstrip("/").split("/") - level = len(flat) - name = flat[-1] - child = rnode(name,level) - if level == current_level: - child.parent = previous_node.parent - previous_node.parent.children.append(child) - if level == 3: - for fname in [f for f in files if f.endswith(".nc") and "Benchmark" not in f]: - with Dataset(os.path.join(subdir,fname)) as dset: - if "Relationships" not in dset.groups: continue - grp = dset.groups["Relationships"]["scalars"] - model = dset.name - for var in [var for var in grp.variables.keys() if ("Overall" not in var and - "global" in var)]: - rname = var.split(" ")[0] - hadrel = False - for c in child.children: - if c.name == rname: - rel = c - hadrel = True - if not hadrel: rel = rnode(rname,level+1) - mod = rnode(model,level+2) - mod.score = grp.variables[var][...] - mod.parent = rel - rel.children.append(mod) - rel.parent = child - if not hadrel: child.children.append(rel) - elif level > current_level: - child.parent = previous_node - previous_node.children.append(child) - current_level = level - else: - addto = tree - for i in range(level-1): addto = addto.children[-1] - child.parent = addto - addto.children.append(child) - current_level = level - previous_node = child - - global score,count,rows - score = {} - count = {} - rows = [] - TraversePreorder(tree,_parse) - models = [] - for row in rows: - for key in score[row].keys(): - if key not in models: models.append(key) - data = np.zeros((len(rows),len(models))) + BenchmarkSummaryFigure(models,variables,data,filename,vcolor=vcolors,rel_only=rel_only) + +def GenerateRelSummaryFigure(S,M,figname,rel_only=False): + + # reorganize the relationship data + scores = {} + counts = {} + rows = [] + vcolors = [] + for h1 in S.children: + for dep in h1.children: + dname = dep.name.split("/")[0] + for ind in dep.children: + iname = ind.name.split("/")[0] + key = "%s/%s" % (dname,iname) + if scores.has_key(key): + scores[key] += ind.score + counts[key] += 1. + else: + scores[key] = np.copy(ind.score) + counts[key] = 1. + rows .append(key) + vcolors.append(h1.bgcolor) + if len(rows) == 0: return + data = np.ma.zeros((len(rows),len(M))) for i,row in enumerate(rows): - for j,col in enumerate(models): - try: - data[i,j] = score[row][col] / count[row][col] - except: - data[i,j] = np.nan - BenchmarkSummaryFigure(models,rows,data,figname,rel_only=False) + data[i,:] = scores[row] / counts[row] + BenchmarkSummaryFigure([m.name for m in M],rows,data,figname,rel_only=rel_only,vcolor=vcolors) def GenerateRelationshipTree(S,M): @@ -753,7 +694,6 @@ def GenerateRelationshipTree(S,M): if "Overall Score global" not in grp.variables.keys(): continue h2.score[i] = grp.variables["Overall Score global"][...] - return rel_tree diff --git a/ilamb/ilamb/src/ILAMB/Variable.py b/ilamb/ilamb/src/ILAMB/Variable.py index 7d1f48b8..fcfa33f9 100644 --- a/ilamb/ilamb/src/ILAMB/Variable.py +++ b/ilamb/ilamb/src/ILAMB/Variable.py @@ -3,7 +3,7 @@ from mpl_toolkits.basemap import Basemap import matplotlib.colors as colors from pylab import get_cmap -from cfunits import Units +from cf_units import Unit import ilamblib as il import Post as post import numpy as np @@ -220,6 +220,17 @@ def __str__(self): return s + def nbytes(self): + r"""Estimate the memory usage of a variable in bytes. + """ + nbytes = 0. + for key in self.__dict__.keys(): + try: + nbytes += self.__dict__[key].nbytes + except: + pass + return nbytes + def integrateInTime(self,**keywords): r"""Integrates the variable over a given time period. @@ -286,7 +297,7 @@ def integrateInTime(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Units(self.unit) + unit = Unit(self.unit) name = self.name + "_integrated_over_time" if mean: @@ -300,18 +311,18 @@ def integrateInTime(self,**keywords): else: dt = dt.sum(axis=0) np.seterr(over='ignore',under='ignore') - integral /= dt + integral = integral / dt np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("d")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("d")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = integral, - unit = unit.units, + unit = "%s" % unit, name = name, lat = self.lat, lat_bnds = self.lat_bnds, @@ -403,7 +414,7 @@ def integrateInDepth(self,**keywords): integral = np.ma.masked_array(integral,mask=mask,copy=False) # handle units - unit = Units(self.unit) + unit = Unit(self.unit) name = self.name + "_integrated_over_depth" if mean: @@ -417,18 +428,18 @@ def integrateInDepth(self,**keywords): else: dz = dz.sum(axis=axis) np.seterr(over='ignore',under='ignore') - integral /= dz + integral = integral / dz np.seterr(over='raise' ,under='raise' ) else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("m")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("m")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = integral, - unit = unit.units, + unit = "%s" % unit, name = name, time = self.time, time_bnds = self.time_bnds, @@ -521,13 +532,13 @@ def _integrate(var,areas): integral = _integrate(self.data,measure) if mean: np.seterr(under='ignore') - integral /= measure.sum() + integral = integral / measure.sum() np.seterr(under='raise') # handle the name and unit name = self.name + "_integrated_over_space" if region is not None: name = name.replace("space",region) - unit = Units(self.unit) + unit = Unit(self.unit) if mean: # we have already divided thru by the non-masked area in @@ -536,12 +547,12 @@ def _integrate(var,areas): else: # if not a mean, we need to potentially handle unit conversions - unit0 = Units("m2")*unit - unit = Units(unit0.formatted().split()[-1]) - integral = Units.conform(integral,unit0,unit) + unit0 = Unit("m2")*unit + unit = Unit(unit0.format().split()[-1]) + integral = unit0.convert(integral,unit) return Variable(data = np.ma.masked_array(integral), - unit = unit.units, + unit = "%s" % unit, time = self.time, time_bnds = self.time_bnds, depth = self.depth, @@ -710,7 +721,7 @@ def _make_bnds(x): bnds[0] = max(x[0] -0.5*(x[ 1]-x[ 0]),-180) bnds[-1] = min(x[-1]+0.5*(x[-1]-x[-2]),+180) return bnds - assert Units(var.unit) == Units(self.unit) + assert Unit(var.unit) == Unit(self.unit) assert self.temporal == False assert self.ndata == var.ndata assert self.layered == False @@ -752,7 +763,7 @@ def _make_bnds(x): def convert(self,unit,density=998.2): """Convert the variable to a given unit. - We use the UDUNITS library via the cfunits python interface to + We use the UDUNITS library via the cf_units python interface to convert the variable's unit. Additional support is provided for unit conversions in which substance information is required. For example, in quantities such as precipitation it @@ -777,53 +788,53 @@ def convert(self,unit,density=998.2): this object with its unit converted """ - src_unit = Units(self.unit) - tar_unit = Units( unit) + if unit is None: return self + src_unit = Unit(self.unit) + tar_unit = Unit( unit) mask = self.data.mask # Define some generic quantities - linear = Units("m") - linear_rate = Units("m s-1") - area_density = Units("kg m-2") - area_density_rate = Units("kg m-2 s-1") - mass_density = Units("kg m-3") - volume_conc = Units("mol m-3") - mass_conc = Units("mol kg-1") - - # cfunits doesn't handle frequently found temperature expressions + linear = Unit("m") + linear_rate = Unit("m s-1") + area_density = Unit("kg m-2") + area_density_rate = Unit("kg m-2 s-1") + mass_density = Unit("kg m-3") + volume_conc = Unit("mol m-3") + mass_conc = Unit("mol kg-1") + + # UDUNITS doesn't handle frequently found temperature expressions synonyms = {"K":"degK", "R":"degR", "C":"degC", "F":"degF"} for syn in synonyms.keys(): - if src_unit.units == syn: src_unit = Units(synonyms[syn]) - if tar_unit.units == syn: tar_unit = Units(synonyms[syn]) + if src_unit.format() == syn: src_unit = Unit(synonyms[syn]) + if tar_unit.format() == syn: tar_unit = Unit(synonyms[syn]) # Do we need to multiply by density? - if ( (src_unit.equivalent(linear_rate) and tar_unit.equivalent(area_density_rate)) or - (src_unit.equivalent(linear ) and tar_unit.equivalent(area_density )) or - (src_unit.equivalent(mass_conc ) and tar_unit.equivalent(volume_conc )) ): + if ( (src_unit.is_convertible(linear_rate) and tar_unit.is_convertible(area_density_rate)) or + (src_unit.is_convertible(linear ) and tar_unit.is_convertible(area_density )) or + (src_unit.is_convertible(mass_conc ) and tar_unit.is_convertible(volume_conc )) ): np.seterr(over='ignore',under='ignore') self.data *= density np.seterr(over='raise',under='raise') src_unit *= mass_density # Do we need to divide by density? - if ( (tar_unit.equivalent(linear_rate) and src_unit.equivalent(area_density_rate)) or - (tar_unit.equivalent(linear ) and src_unit.equivalent(area_density )) or - (tar_unit.equivalent(mass_conc ) and src_unit.equivalent(volume_conc )) ): + if ( (tar_unit.is_convertible(linear_rate) and src_unit.is_convertible(area_density_rate)) or + (tar_unit.is_convertible(linear ) and src_unit.is_convertible(area_density )) or + (tar_unit.is_convertible(mass_conc ) and src_unit.is_convertible(volume_conc )) ): np.seterr(over='ignore',under='ignore') - self.data /= density + self.data = self.data / density np.seterr(over='raise',under='raise') - src_unit /= mass_density + src_unit = src_unit / mass_density # Convert units try: - self.data = Units.conform(self.data,src_unit,tar_unit) + self.data = src_unit.convert(self.data,tar_unit) self.data = np.ma.masked_array(self.data,mask=mask) self.unit = unit except: - print "var_name = %s, src_unit = %s, target_unit = %s " % (self.name,src_unit,tar_unit) raise il.UnitConversionError() return self @@ -1599,7 +1610,7 @@ def spatialDistribution(self,var,region="global"): R0 = 1.0 std0 = std0.clip(1e-12) std = std .clip(1e-12) - std /= std0 + std = std/std0 score = 4.0*(1.0+R.data)/((std+1.0/std)**2 *(1.0+R0)) except: std = np.asarray([0.0]) diff --git a/ilamb/ilamb/src/ILAMB/__init__.py b/ilamb/ilamb/src/ILAMB/__init__.py index f8ea82b9..3bb5124a 100644 --- a/ilamb/ilamb/src/ILAMB/__init__.py +++ b/ilamb/ilamb/src/ILAMB/__init__.py @@ -1,6 +1,6 @@ __author__ = 'Nathan Collier' -__date__ = 'Nov 2017' -__version__ = '2.2' +__date__ = 'Jun 2018' +__version__ = '2.3' from distutils.version import LooseVersion import platform @@ -10,7 +10,7 @@ "numpy" : "1.9.2", "matplotlib" : "1.4.3", "netCDF4" : "1.1.4", - "cfunits" : "1.1.4", + "cf_units" : "2.0.0", "mpl_toolkits.basemap" : "1.0.7", "sympy" : "0.7.6", "mpi4py" : "1.3.1" diff --git a/ilamb/ilamb/src/ILAMB/constants.py b/ilamb/ilamb/src/ILAMB/constants.py index 714209d8..8d77b8f6 100644 --- a/ilamb/ilamb/src/ILAMB/constants.py +++ b/ilamb/ilamb/src/ILAMB/constants.py @@ -130,6 +130,28 @@ "sidelbl" :"RMSE SCORE", "haslegend" :True } +space_opts["iav"] = { "name" :"Interannual variability", + "cmap" :"Reds", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iav.png", + "sidelbl" :"MODEL INTERANNUAL VARIABILITY", + "haslegend" :True } + +space_opts["iavscore"] = { "name" :"Interannual variability score", + "cmap" :"RdYlGn", + "sym" :False, + "ticks" :None, + "ticklabels":None, + "label" :"unit" , + "section" :"Temporally integrated period mean", + "pattern" :"MNAME_RNAME_iavscore.png", + "sidelbl" :"INTERANNUAL VARIABILITY SCORE", + "haslegend" :True } + space_opts["shift"] = { "name" :"Temporally integrated mean phase shift", "cmap" :"PRGn", "sym" :True, diff --git a/ilamb/ilamb/src/ILAMB/ilamblib.py b/ilamb/ilamb/src/ILAMB/ilamblib.py index 1304be6a..b7cff096 100644 --- a/ilamb/ilamb/src/ILAMB/ilamblib.py +++ b/ilamb/ilamb/src/ILAMB/ilamblib.py @@ -3,11 +3,11 @@ from Regions import Regions from netCDF4 import Dataset,num2date,date2num from datetime import datetime -from cfunits import Units +from cf_units import Unit from copy import deepcopy from mpi4py import MPI import numpy as np -import logging +import logging,re logger = logging.getLogger("%i" % MPI.COMM_WORLD.rank) @@ -53,7 +53,30 @@ def __str__(self): return "NotLayeredVariable" class NotDatasiteVariable(Exception): def __str__(self): return "NotDatasiteVariable" +def FixDumbUnits(unit): + r"""Try to fix the dumb units people insist on using. + Parameters + ---------- + unit : str + the trial unit + + Returns + ------- + unit : str + the fixed unit + """ + # Various synonyms for 1 + if unit.lower().strip() in ["unitless", + "n/a", + "none"]: unit = "1" + # Remove the C which so often is used to mean carbon but actually means coulomb + tokens = re.findall(r"[\w']+", unit) + for token in tokens: + if token.endswith("C") and Unit(token[:-1]).is_convertible(Unit("g")): + unit = unit.replace(token,token[:-1]) + return unit + def GenerateDistinctColors(N,saturation=0.67,value=0.67): r"""Generates a series of distinct colors. @@ -86,7 +109,7 @@ def ConvertCalendar(t,tbnd=None): This routine converts the representation of time to the ILAMB default: days since 1850-1-1 00:00:00 on a 365-day calendar. This is so we can make comparisons with data from other models and - benchmarks. We use cfunits time conversion capability. + benchmarks. Parameters ---------- @@ -343,71 +366,51 @@ def SympifyWithArgsUnits(expression,args,units): """ from sympy import sympify,postorder_traversal - # The traversal needs that we make units commensurate when - # possible - keys = args.keys() - for i in range(len(keys)): - ikey = keys[i] - for j in range(i+1,len(keys)): - jkey = keys[j] - if Units(units[jkey]).equivalent(Units(units[ikey])): - args [jkey] = Units.conform(args[jkey], - Units(units[jkey]), - Units(units[ikey]), - inplace=True) - units[jkey] = units[ikey] - - # We need to do what sympify does but also with unit - # conversions. So we traverse the expression tree in post order - # and take actions based on the kind of operation being performed. expression = sympify(expression) + + # try to convert all arguments to same units if possible, it + # catches most use cases + keys = args.keys() + for i,key0 in enumerate(keys): + for key in keys[(i+1):]: + try: + Unit(units[key]).convert(args[key],Unit(units[key0]),inplace=True) + units[key] = units[key0] + except: + pass + for expr in postorder_traversal(expression): - - if expr.is_Atom: continue - ekey = str(expr) # expression key - + ekey = str(expr) if expr.is_Add: - # Addition will require that all args should be the same - # unit. As a convention, we will try to conform all units - # to the first variable's units. - key0 = None - for arg in expr.args: - key = str(arg) - if not args.has_key(key): continue - if key0 is None: - key0 = key - else: - # Conform these units to the units of the first arg - Units.conform(args[key], - Units(units[key]), - Units(units[key0]), - inplace=True) - units[key] = units[key0] - - args [ekey] = sympify(str(expr),locals=args) - units[ekey] = units[key0] + # if there are scalars in the expression, these will not + # be in the units dictionary. Add them and give them an + # implicit unit of 1 + keys = [str(arg) for arg in expr.args] + for key in keys: + if not units.has_key(key): units[key] = "1" - elif expr.is_Pow: + # if we are adding, all arguments must have the same unit. + key0 = keys[0] + for key in keys: + Unit(units[key]).convert(np.ones(1),Unit(units[key0])) + units[key] = units[key0] + units[ekey] = "%s" % (units[key0]) - assert len(expr.args) == 2 # check on an assumption - power = float(expr.args[1]) - args [ekey] = args[str(expr.args[0])]**power - units[ekey] = Units(units[str(expr.args[0])]) - units[ekey] = units[ekey]**power - - elif expr.is_Mul: + elif expr.is_Pow: - unit = Units("1") - for arg in expr.args: - key = str(arg) - if units.has_key(key): unit *= Units(units[key]) - - args [ekey] = sympify(str(expr),locals=args) - units[ekey] = Units(unit).formatted() + # if raising to a power, just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = "(%s)%s" % (units[keys[0]],keys[1]) - return args[ekey],units[ekey] + elif expr.is_Mul: + + # just create the new unit + keys = [str(arg) for arg in expr.args] + units[ekey] = " ".join(["(%s)" % units[key] for key in keys if units.has_key(key)]) + return sympify(str(expression),locals=args),units[ekey] + def ComputeIndexingArrays(lat2d,lon2d,lat,lon): """Blah. @@ -635,11 +638,11 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if depth_bnd_name is not None: depth_bnd = grp.variables[depth_bnd_name][...] if dunit is not None: - if not Units(dunit).equivalent(Units("m")): + if not Unit(dunit).is_convertible(Unit("m")): raise ValueError("Non-linear units [%s] of the layered dimension [%s] in %s" % (dunit,depth_name,filename)) - depth = Units.conform(depth,Units(dunit),Units("m"),inplace=True) + depth = Unit(dunit).convert(depth,Unit("m"),inplace=True) if depth_bnd is not None: - depth_bnd = Units.conform(depth_bnd,Units(dunit),Units("m"),inplace=True) + depth_bnd = Unit(dunit).convert(depth_bnd,Unit("m"),inplace=True) if data_name is not None: data = len(grp.dimensions[data_name]) @@ -701,16 +704,15 @@ def FromNetCDF4(filename,variable_name,alternate_vars=[],t0=None,tf=None,group=N if "missing_value" in var.ncattrs(): mask += (np.abs(v-var.missing_value)<1e-12) v = np.ma.masked_array(v,mask=mask,copy=False) - # handle units problems that cfunits doesn't if "units" in var.ncattrs(): - units = var.units.replace("unitless","1") + units = FixDumbUnits(var.units) else: units = "1" dset.close() return v,units,variable_name,t,t_bnd,lat,lat_bnd,lon,lon_bnd,depth,depth_bnd,cbounds,data -def Score(var,normalizer,FC=0.999999): +def Score(var,normalizer): """Remaps a normalized variable to the interval [0,1]. Parameters @@ -726,16 +728,7 @@ def Score(var,normalizer,FC=0.999999): name = name.replace("rmse","rmse_score") name = name.replace("iav" ,"iav_score") np.seterr(over='ignore',under='ignore') - - data = None - if "bias" in var.name or "diff" in var.name: - deno = np.ma.copy(normalizer.data) - if (deno.size - deno.mask.sum()) > 1: deno -= deno.min()*FC - data = np.exp(-np.abs(var.data/deno)) - elif "rmse" in var.name: - data = np.exp(-var.data/normalizer.data) - elif "iav" in var.name: - data = np.exp(-np.abs(var.data/normalizer.data)) + data = np.exp(-np.abs(var.data/normalizer.data)) data[data<1e-16] = 0. np.seterr(over='raise',under='raise') return Variable(name = name, @@ -810,7 +803,7 @@ def _composeGrids(v1,v2): lon = lon_bnds.mean(axis=1) return lat,lon,lat_bnds,lon_bnds -def AnalysisMeanState(ref,com,**keywords): +def AnalysisMeanStateSites(ref,com,**keywords): """Perform a mean state analysis. This mean state analysis examines the model mean state in space @@ -848,6 +841,7 @@ def AnalysisMeanState(ref,com,**keywords): the unit to use when displaying output on plots on the HTML page """ + from Variable import Variable regions = keywords.get("regions" ,["global"]) dataset = keywords.get("dataset" ,None) @@ -860,14 +854,14 @@ def AnalysisMeanState(ref,com,**keywords): skip_iav = keywords.get("skip_iav" ,False) skip_cycle = keywords.get("skip_cycle" ,False) ILAMBregions = Regions() - spatial = ref.spatial + spatial = False normalizer = None # Only study the annual cycle if it makes sense if not ref.monthly: skip_cycle = True if ref.time.size < 12: skip_cycle = True - - # We find + if skip_rmse : skip_iav = True + if spatial: lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) @@ -926,14 +920,58 @@ def AnalysisMeanState(ref,com,**keywords): # Compute the bias, RMSE, and RMS maps using the interpolated # quantities bias = REF_timeint.bias(COM_timeint) - bias_score_map = Score(bias,REF_timeint) + cREF = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata) + crms = cREF.rms () + bias_score_map = Score(bias,crms) if spatial: bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask if not skip_rmse: - rmse = REF.rmse(COM) - rms = REF.rms () - rmse_score_map = Score(rmse,rms) - + cCOM = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata) + rmse = REF.rmse( COM) + crmse = cREF.rmse(cCOM) + rmse_score_map = Score(crmse,crms) + if not skip_iav: + ref_iav = Variable(name = "centralized %s" % ref.name, unit = ref.unit, + data = np.ma.masked_array(ref.data-ref_timeint.data[np.newaxis,...],mask=ref.data.mask), + time = ref.time, time_bnds = ref.time_bnds, + lat = ref.lat , lat_bnds = ref.lat_bnds, + lon = ref.lon , lon_bnds = ref.lon_bnds, + area = ref.area, ndata = ref.ndata).rms() + com_iav = Variable(name = "centralized %s" % com.name, unit = com.unit, + data = np.ma.masked_array(com.data-com_timeint.data[np.newaxis,...],mask=com.data.mask), + time = com.time, time_bnds = com.time_bnds, + lat = com.lat , lat_bnds = com.lat_bnds, + lon = com.lon , lon_bnds = com.lon_bnds, + area = com.area, ndata = com.ndata).rms() + REF_iav = Variable(name = "centralized %s" % REF.name, unit = REF.unit, + data = np.ma.masked_array(REF.data-REF_timeint.data[np.newaxis,...],mask=REF.data.mask), + time = REF.time, time_bnds = REF.time_bnds, + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata).rms() + COM_iav = Variable(name = "centralized %s" % COM.name, unit = COM.unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = COM.lat , lat_bnds = COM.lat_bnds, + lon = COM.lon , lon_bnds = COM.lon_bnds, + area = COM.area, ndata = COM.ndata).rms() + iav_score_map = Score(Variable(name = "diff %s" % REF.name, unit = REF.unit, + data = (COM_iav.data-REF_iav.data), + lat = REF.lat , lat_bnds = REF.lat_bnds, + lon = REF.lon , lon_bnds = REF.lon_bnds, + area = REF.area, ndata = REF.ndata), + REF_iav) + # The phase shift comes from the interpolated quantities if not skip_cycle: ref_cycle = REF.annualCycle() @@ -948,7 +986,7 @@ def AnalysisMeanState(ref,com,**keywords): ref_period_mean = {}; ref_spaceint = {}; ref_mean_cycle = {}; ref_dtcycle = {} com_period_mean = {}; com_spaceint = {}; com_mean_cycle = {}; com_dtcycle = {} bias_val = {}; bias_score = {}; rmse_val = {}; rmse_score = {} - space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {} + space_std = {}; space_cor = {}; sd_score = {}; shift = {}; shift_score = {}; iav_score = {} ref_union_mean = {}; ref_comp_mean = {} com_union_mean = {}; com_comp_mean = {} for region in regions: @@ -975,6 +1013,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region] = rmse .integrateInSpace(region=region,mean=True) rmse_score [region] = rmse_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .integrateInSpace(region=region,mean=True,weight=normalizer) space_std[region],space_cor[region],sd_score[region] = REF_timeint.spatialDistribution(COM_timeint,region=region) else: ref_period_mean[region] = ref_timeint .siteStats(region=region) @@ -995,6 +1035,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region] = rmse .siteStats(region=region) rmse_score [region] = rmse_score_map .siteStats(region=region,weight=normalizer) + if not skip_iav: + iav_score [region] = iav_score_map .siteStats(region=region,weight=normalizer) ref_period_mean[region].name = "Period Mean (original grids) %s" % (region) ref_spaceint [region].name = "spaceint_of_%s_over_%s" % (ref.name,region) @@ -1005,6 +1047,8 @@ def AnalysisMeanState(ref,com,**keywords): if not skip_rmse: rmse_val [region].name = "RMSE %s" % (region) rmse_score [region].name = "RMSE Score %s" % (region) + if not skip_iav: + iav_score [region].name = "Interannual Variability Score %s" % (region) if not skip_cycle: ref_mean_cycle[region].name = "cycle_of_%s_over_%s" % (ref.name,region) ref_dtcycle [region].name = "dtcycle_of_%s_over_%s" % (ref.name,region) @@ -1033,6 +1077,7 @@ def _convert(var,unit): plot_vars = [com_timeint,ref_timeint,bias,com_spaceint,ref_spaceint,bias_val] if not skip_rmse: plot_vars += [rmse,rmse_val] if not skip_cycle: plot_vars += [com_mean_cycle,ref_mean_cycle,com_dtcycle,ref_dtcycle] + if not skip_iav: plot_vars += [com_iav] for var in plot_vars: _convert(var,plot_unit) # Rename and optionally dump out information to netCDF4 files @@ -1064,13 +1109,17 @@ def _convert(var,unit): out_vars.append(shift_score_map) if not skip_rmse: rmse .name = "rmse_map_of_%s" % ref.name - rms .name = "rms_map_of_%s" % ref.name rmse_score_map.name = "rmsescore_map_of_%s" % ref.name out_vars.append(rmse) - out_vars.append(rms ) out_vars.append(rmse_score_map) out_vars.append(rmse_val) out_vars.append(rmse_score) + if not skip_iav: + com_iav.name = "iav_map_of_%s" % ref.name + iav_score_map.name = "iavscore_map_of_%s" % ref.name + out_vars.append(com_iav) + out_vars.append(iav_score_map) + out_vars.append(iav_score) if dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1089,6 +1138,9 @@ def _convert(var,unit): if not skip_cycle: ref_maxt_map.name = "phase_map_of_%s" % ref.name out_vars += [ref_maxt_map,ref_mean_cycle,ref_dtcycle] + if not skip_iav: + ref_iav.name = "iav_map_of_%s" % ref.name + out_vars.append(ref_iav) if benchmark_dataset is not None: for var in out_vars: if type(var) == type({}): @@ -1097,124 +1149,303 @@ def _convert(var,unit): var.toNetCDF4(benchmark_dataset,group="MeanState") return - -def AnalysisRelationship(dep_var,ind_var,dataset,rname,**keywords): - """Perform a relationship analysis. - - Expand to provide details of what exactly is done. + +def AnalysisMeanStateSpace(ref,com,**keywords): + """Perform a mean state analysis. + + This mean state analysis examines the model mean state in space + and time. We compute the mean variable value over the time period + at each spatial cell or data site as appropriate, as well as the + bias and RMSE relative to the observational variable. We will + output maps of the period mean values and bias. For each spatial + cell or data site we also estimate the phase of the variable by + finding the mean time of year when the maximum occurs and the + phase shift by computing the difference in phase with respect to + the observational variable. In the spatial dimension, we compute a + spatial mean for each of the desired regions and an average annual + cycle. Parameters ---------- - dep_var : ILAMB.Variable.Variable - the dependent variable - ind_var : ILAMB.Variable.Variable - the independent variable - dataset : netCDF4.Dataset + obs : ILAMB.Variable.Variable + the observational (reference) variable + mod : ILAMB.Variable.Variable + the model (comparison) variable + regions : list of str, optional + the regions overwhich to apply the analysis + dataset : netCDF4.Dataset, optional a open dataset in write mode for caching the results of the analysis which pertain to the model - rname : str - the name of the relationship under study - regions : list of str, optional - a list of units over which to apply the analysis - dep_plot_unit,ind_plot_unit : str, optional - the name of the unit to use in the plots found on the HTML output - - """ - def _extractMaxTemporalOverlap(v1,v2): # should move? - t0 = max(v1.time.min(),v2.time.min()) - tf = min(v1.time.max(),v2.time.max()) - for v in [v1,v2]: - begin = np.argmin(np.abs(v.time-t0)) - end = np.argmin(np.abs(v.time-tf))+1 - v.time = v.time[begin:end] - v.data = v.data[begin:end,...] - mask = v1.data.mask + v2.data.mask - v1 = v1.data[mask==0].flatten() - v2 = v2.data[mask==0].flatten() - return v1,v2 - - # grab regions - regions = keywords.get("regions",["global"]) + benchmark_dataset : netCDF4.Dataset, optional + a open dataset in write mode for caching the results of the + analysis which pertain to the observations + space_mean : bool, optional + disable to compute sums of the variable over space instead of + mean values + table_unit : str, optional + the unit to use when displaying output in tables on the HTML page + plots_unit : str, optional + the unit to use when displaying output on plots on the HTML page + + """ + from Variable import Variable + regions = keywords.get("regions" ,["global"]) + dataset = keywords.get("dataset" ,None) + benchmark_dataset = keywords.get("benchmark_dataset",None) + space_mean = keywords.get("space_mean" ,True) + table_unit = keywords.get("table_unit" ,None) + plot_unit = keywords.get("plot_unit" ,None) + mass_weighting = keywords.get("mass_weighting" ,False) + skip_rmse = keywords.get("skip_rmse" ,False) + skip_iav = keywords.get("skip_iav" ,False) + skip_cycle = keywords.get("skip_cycle" ,False) + ILAMBregions = Regions() + spatial = ref.spatial + + # Convert str types to booleans + if type(skip_rmse) == type(""): + skip_rmse = (skip_rmse.lower() == "true") + if type(skip_iav ) == type(""): + skip_iav = (skip_iav .lower() == "true") + if type(skip_cycle) == type(""): + skip_cycle = (skip_cycle.lower() == "true") - # convert to plot units - dep_plot_unit = keywords.get("dep_plot_unit",dep_var.unit) - ind_plot_unit = keywords.get("ind_plot_unit",ind_var.unit) - if dep_plot_unit is not None: dep_var.convert(dep_plot_unit) - if ind_plot_unit is not None: ind_var.convert(ind_plot_unit) - - # if the variables are temporal, we need to get period means - if dep_var.temporal: dep_var = dep_var.integrateInTime(mean=True) - if ind_var.temporal: ind_var = ind_var.integrateInTime(mean=True) - mask = dep_var.data.mask + ind_var.data.mask - - # analysis over regions - for region in regions: + # Check if we need to skip parts of the analysis + if not ref.monthly : skip_cycle = True + if ref.time.size < 12: skip_cycle = True + if ref.time.size == 1: skip_rmse = True + if skip_rmse : skip_iav = True + name = ref.name + + # Interpolate both reference and comparison to a grid composed of + # their cell breaks + ref.convert(plot_unit) + com.convert(plot_unit) + lat,lon,lat_bnds,lon_bnds = _composeGrids(ref,com) + REF = ref.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + COM = com.interpolate(lat=lat,lon=lon,lat_bnds=lat_bnds,lon_bnds=lon_bnds) + unit = REF.unit + area = REF.area + ndata = REF.ndata + + # Find the mean values over the time period + ref_timeint = ref.integrateInTime(mean=True).convert(plot_unit) + com_timeint = com.integrateInTime(mean=True).convert(plot_unit) + REF_timeint = REF.integrateInTime(mean=True).convert(plot_unit) + COM_timeint = COM.integrateInTime(mean=True).convert(plot_unit) + normalizer = REF_timeint.data if mass_weighting else None + + # Report period mean values over all possible representations of + # land + ref_and_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == False) + ref_not_com = (REF_timeint.data.mask == False) * (COM_timeint.data.mask == True ) + com_not_ref = (REF_timeint.data.mask == True ) * (COM_timeint.data.mask == False) + if benchmark_dataset is not None: - lats,lons = ILAMBregions[region] - rmask = (np.outer((dep_var.lat>lats[0])*(dep_var.latlons[0])*(dep_var.lon 1 else REF_timeint) + bias_score_map.data.mask = (ref_and_com==False) # for some reason I need to explicitly force the mask + if dataset is not None: + bias.name = "bias_map_of_%s" % name + bias.toNetCDF4(dataset,group="MeanState") + bias_score_map.name = "biasscore_map_of_%s" % name + bias_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + bias_val = bias.integrateInSpace(region=region,mean=True).convert(plot_unit) + bias_val.name = "Bias %s" % region + bias_val.toNetCDF4(dataset,group="MeanState") + bias_score = bias_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + bias_score.name = "Bias Score %s" % region + bias_score.toNetCDF4(dataset,group="MeanState") + del bias,bias_score_map + + # Spatial mean: plots + if REF.time.size > 1: + if benchmark_dataset is not None: + for region in regions: + ref_spaceint = REF.integrateInSpace(region=region,mean=True) + ref_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + ref_spaceint.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + for region in regions: + com_spaceint = COM.integrateInSpace(region=region,mean=True) + com_spaceint.name = "spaceint_of_%s_over_%s" % (name,region) + com_spaceint.toNetCDF4(dataset,group="MeanState") + + # RMSE: maps, scalars, and scores + if not skip_rmse: + rmse = REF.rmse(COM).convert(plot_unit) + del REF + cCOM = Variable(name = "centralized %s" % name, unit = unit, + data = np.ma.masked_array(COM.data-COM_timeint.data[np.newaxis,...],mask=COM.data.mask), + time = COM.time, time_bnds = COM.time_bnds, + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = COM.area, ndata = COM.ndata).convert(plot_unit) + del COM + crmse = cREF.rmse(cCOM).convert(plot_unit) + del cREF + if skip_iav: del cCOM + rmse_score_map = Score(crmse,REF_iav) + if dataset is not None: + rmse.name = "rmse_map_of_%s" % name + rmse.toNetCDF4(dataset,group="MeanState") + rmse_score_map.name = "rmsescore_map_of_%s" % name + rmse_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + rmse_val = rmse.integrateInSpace(region=region,mean=True).convert(plot_unit) + rmse_val.name = "RMSE %s" % region + rmse_val.toNetCDF4(dataset,group="MeanState") + rmse_score = rmse_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + rmse_score.name = "RMSE Score %s" % region + rmse_score.toNetCDF4(dataset,group="MeanState") + del rmse,crmse,rmse_score_map + + # IAV: maps, scalars, scores + if not skip_iav: + COM_iav = cCOM.rms() + del cCOM + iav_score_map = Score(Variable(name = "diff %s" % name, unit = unit, + data = (COM_iav.data-REF_iav.data), + lat = lat, lat_bnds = lat_bnds, lon = lon, lon_bnds = lon_bnds, + area = area, ndata = ndata), + REF_iav) + if benchmark_dataset is not None: + REF_iav.name = "iav_map_of_%s" % name + REF_iav.toNetCDF4(benchmark_dataset,group="MeanState") + if dataset is not None: + COM_iav.name = "iav_map_of_%s" % name + COM_iav.toNetCDF4(dataset,group="MeanState") + iav_score_map.name = "iavscore_map_of_%s" % name + iav_score_map.toNetCDF4(dataset,group="MeanState") + for region in regions: + iav_score = iav_score_map.integrateInSpace(region=region,mean=True,weight=normalizer) + iav_score.name = "Interannual Variability Score %s" % region + iav_score.toNetCDF4(dataset,group="MeanState") + del COM_iav,iav_score_map + del REF_iav + + return def ClipTime(v,t0,tf): """Remove time from a variable based on input bounds. @@ -1300,10 +1531,10 @@ def MakeComparable(ref,com,**keywords): # If the reference is spatial, the comparison must be if ref.spatial and not com.spatial: - msg = "%s Datasets are not uniformly spatial: " % logstring - msg += "reference = %s, comparison = %s" % (ref.spatial,com.spatial) - logger.debug(msg) - raise VarsNotComparable() + ref = ref.extractDatasites(com.lat,com.lon) + msg = "%s The reference dataset is spatial but the comparison is site-based. " % logstring + msg += "Extracted %s sites from the reference to match the comparison." % ref.ndata + logger.info(msg) # If the reference is layered, the comparison must be if ref.layered and not com.layered: @@ -1383,7 +1614,7 @@ def MakeComparable(ref,com,**keywords): # comparison, coarsen the comparison if np.log10(ref.dt/com.dt) > 0.5: com = com.coarsenInTime(ref.time_bnds,window=window) - + # Time bounds of the reference dataset t0 = ref.time_bnds[ 0,0] tf = ref.time_bnds[-1,1] diff --git a/ilamb/ilamb/test/scores_test.csv.gold b/ilamb/ilamb/test/scores_test.csv.gold index 6d65ab7b..6fccfb93 100644 --- a/ilamb/ilamb/test/scores_test.csv.gold +++ b/ilamb/ilamb/test/scores_test.csv.gold @@ -1,9 +1,9 @@ Variables,CLM50r243CRUNCEP,CLM50r243GSWP3 -Biomass,0.595710463937,0.678304573522 -Gross Primary Productivity,0.753476728464,0.741270301037 -Global Net Ecosystem Carbon Balance,0.705400063727,0.863669079462 -Net Ecosystem Exchange,0.524058275106,0.504338904659 -Terrestrial Water Storage Anomaly,0.484015616221,0.470205924215 -Albedo,0.771776381299,0.774604472682 -Surface Air Temperature,0.988457088529,0.990624010352 -Precipitation,0.812343937554,0.824581872315 +Biomass,0.5957104653413856,0.6783045750117078 +Gross Primary Productivity,0.6217211297637607,0.6126273585798891 +Global Net Ecosystem Carbon Balance,0.7054000637266042,0.8636690794621101 +Net Ecosystem Exchange,0.3941918077804778,0.38120476926634617 +Terrestrial Water Storage Anomaly,0.7000653021257858,0.7269702240175762 +Albedo,0.5434663466148166,0.544587485316599 +Surface Air Temperature,0.9256731031865132,0.9314748385926337 +Precipitation,0.7555153501937276,0.7679655805094326