Skip to content

Commit

Permalink
Merge pull request #25 from aradhakrishnanGFDL/mdtf-support
Browse files Browse the repository at this point in the history
Mdtf support
  • Loading branch information
aradhakrishnanGFDL authored Aug 1, 2024
2 parents 7598edf + 54f76e8 commit bcb2d17
Show file tree
Hide file tree
Showing 11 changed files with 73 additions and 37 deletions.
7 changes: 2 additions & 5 deletions .github/workflows/conda-env-create-run-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,8 @@ jobs:
with:
name: workflow-artifacts1
path: |
gfdl_autotest.csv
gfdl_autotest.json
cats/gfdl_autotest_from_yaml.csv
cats/gfdl_autotest_from_yaml.json
catalogbuilder/cats/gfdl_autotest_from_yaml.json
catalogbuilder/cats/gfdl_autotest_from_yaml.csv
- name: Download all workflow run artifacts
uses: actions/download-artifact@v4

Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
recursive-include catalogbuilder/cats *

recursive-include catalogbuilder/intakebuilder/dat *
2 changes: 1 addition & 1 deletion catalogbuilder/intakebuilder/builderconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
headerlist = ["activity_id", "institution_id", "source_id", "experiment_id",
"frequency", "realm", "table_id",
"member_id", "grid_label", "variable_id",
"temporal_subset", "chunk_freq","grid_label","platform","dimensions","cell_methods","path"]
"time_range", "chunk_freq","grid_label","platform","dimensions","cell_methods","path"]

#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
Expand Down
10 changes: 10 additions & 0 deletions catalogbuilder/intakebuilder/dat/gfdlcmipfreq.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
monthly:
frequency: mon
daily:
frequency: day
hourly:
frequency: 1hr
annual:
frequency: yearly
3hr:
frequency: 3hr
67 changes: 44 additions & 23 deletions catalogbuilder/intakebuilder/getinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,19 @@ def getinfoFromYAML(dictInfo,yamlfile,miptable=None):
dictInfo["realm"] = "NA"
return(dictInfo)

def getFreqFromYAML(yamlfile,gfdlfreq=None):
#returns cmip freq for gfdl pp freq
import yaml
cmipfreq = None
with open(yamlfile) as f:
mappings = yaml.load(f, Loader=yaml.FullLoader)
if(gfdlfreq):
try:
cmipfreq = mappings[gfdlfreq]["frequency"]
except KeyError:
cmipfreq = None
return(cmipfreq)

def getStem(dirpath,projectdir):
'''
return stem from the project directory passed and the files crawled within
Expand Down Expand Up @@ -81,29 +94,35 @@ def getInfoFromFilename(filename,dictInfo,logger):
return dictInfo

#adding this back to trace back some old errors
def getInfoFromGFDLFilename(filename,dictInfo,logger):
def getInfoFromGFDLFilename(filename,dictInfo,logger,configyaml):
# 5 AR: get the following from the netCDF filename e.g. atmos.200501-200912.t_ref.nc
if(filename.endswith(".nc")): #and not filename.startswith(".")):
ncfilename = filename.split(".")
varname = ncfilename[-2]
dictInfo["variable_id"] = varname
#miptable = "" #ncfilename[1]
#dictInfo["mip_table"] = miptable
#modelname = ncfilename[2]
#dictInfo["model"] = modelname
#expname = ncfilename[3]
#dictInfo["experiment_id"] = expname
#ens = ncfilename[4]
#dictInfo["ensemble_member"] = ens
#grid = ncfilename[5]
#dictInfo["grid_label"] = grid
try:
tsubset = ncfilename[1]
except IndexError:
tsubset = "null" #For fx fields
dictInfo["temporal_subset"] = tsubset
if ( (filename.endswith(".nc"))): # & ("static" not in filename)) ):
stemdir = filename.split(".")
#lets go backwards and match given input directory to the template, add things to dictInfo
j = -2
cnt = 1 #'variable_id': 'static', 'time_range': 'land'}
if configyaml:
output_file_template = configyaml.output_file_template
else:
logger.debug("Filename not compatible with this version of the builder:"+filename)
try:
output_file_template = builderconfig.output_file_template
except:
sys.exit("No output_path_template found. Check configuration.")
#output_file_template.reverse()
nlen = len(output_file_template)
for i in range(nlen-1,-1,-1): #nlen = 3
try:
if(output_file_template[i] != "NA"):
try:
#print(output_file_template[i], "=" , stemdir[(j)])
dictInfo[output_file_template[i]] = stemdir[(j)]
except IndexError:
#print("Check configuration. Is output file template set correctly?")
dictInfo[output_file_template[i]] = ""
except IndexError:
sys.exit("oops in getInfoFromGFDLFilename"+str(i)+str(j)+output_file_template[i]+stemdir[j])
j = j - 1
cnt = cnt + 1
return dictInfo

def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml):
Expand Down Expand Up @@ -239,12 +258,14 @@ def getStandardName(list_variable_id):
#search for variable and its cf name
for variable_id in list_variable_id:
cfname = (df[df['GFDL_varname'] == variable_id]["standard_name"])
#print(cfname,variable_id)
list_cfname = cfname.tolist()
if not list_cfname:
if(len(list_cfname) == 0):
#print("what if the names correspond to CMOR_varname")
cfname = (df[df['CMOR_varname'] == variable_id]["standard_name"])
list_cfname = cfname.tolist()
#print(list_cfname)
if len(list_cfname) > 0:
unique_cf = list(set(list_cfname))[0]
dictCF[variable_id] = unique_cf
dictCF[variable_id] = unique_cf
return (dictCF)
13 changes: 11 additions & 2 deletions catalogbuilder/intakebuilder/gfdlcrawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow):
if (op.countOf(filename,".") == 1):
dictInfo = getinfo.getInfoFromFilename(filename,dictInfo, logger)
else:
dictInfo = getinfo.getInfoFromGFDLFilename(filename,dictInfo, logger)
dictInfo = getinfo.getInfoFromGFDLFilename(filename,dictInfo, logger,configyaml)
dictInfo = getinfo.getInfoFromGFDLDRS(dirpath, projectdir, dictInfo,configyaml)
list_bad_modellabel = ["","piControl","land-hist","piClim-SO2","abrupt-4xCO2","hist-piAer","hist-piNTCF","piClim-ghg","piClim-OC","hist-GHG","piClim-BC","1pctCO2"]
list_bad_chunklabel = ['DO_NOT_USE']
Expand Down Expand Up @@ -106,6 +106,15 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow):
if "standard_name" in missingcols:
dictInfo["standard_name"] = "na"
getinfo.getInfoFromVarAtts(dictInfo["path"],dictInfo["variable_id"],dictInfo)

#replace frequency as needed
if 'frequency' in dictInfo.keys():
package_dir = os.path.dirname(os.path.abspath(__file__))
yamlfile = os.path.join(package_dir, 'dat/gfdlcmipfreq.yaml')
cmipfreq = None
gfdlfreq = dictInfo['frequency']
cmipfreq = getinfo.getFreqFromYAML(yamlfile,gfdlfreq=dictInfo['frequency'])
if(cmipfreq is not None):
dictInfo['frequency'] = cmipfreq
#print("Adjusting frequency from ", gfdlfreq ," to ",cmipfreq)
listfiles.append(dictInfo)
return listfiles
1 change: 0 additions & 1 deletion catalogbuilder/scripts/gen_intake_gfdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ def main(input_path=None, output_path=None, config=None, filter_realm=None, filt
#if(df['variable_id'].eq(k)).any():
df['standard_name'].loc[(df['variable_id'] == k)] = v
#df['standard_name'] = v

if(slow == False) & ('standard_name' in headers):
if ((df is not None) & (len(df) != 0) ):
with open(csv_path, 'w') as csvfile:
Expand Down
2 changes: 1 addition & 1 deletion catalogbuilder/tests/config-cfname.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ output_file_template: ['realm','time_range','variable_id']
#######################################################

input_path: "/archive/am5/am5/am5f7b10r0/c96L65_am5f7b10r0_amip/gfdl.ncrc5-deploy-prod-openmp/pp/"
output_path: "/home/a1r/github/noaa-gfdl/catalogs/c96L65_am5f7b10r0_amip" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)
output_path: "/home/a1r/github/noaa-gfdl/catalogs/c96L65_am5f7b10r0_amip30" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)
2 changes: 1 addition & 1 deletion catalogbuilder/tests/subdirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@
'uas'
]
time = [
'000101-000112'
'000101-000112',
'000201-000212'
]
2 changes: 1 addition & 1 deletion catalogbuilder/tests/test_ci_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_loadcat():
#todo check if its readable etc
#we are using the dynamically generated csv and json for testing in this routine
#leveraging GitHub actions CI workflow and manifests and caches
catspec = pathlib.Path(os.path.dirname(__file__)).parent / '../workflow-artifacts1/gfdl_autotest.json'
catspec = pathlib.Path(os.path.dirname(__file__)).parent / '../workflow-artifacts1/gfdl_autotest_from_yaml.json'
cat = load_cat((str(catspec)))
try:
assert isinstance(cat.df, pd.DataFrame),"test failed"
Expand Down
2 changes: 1 addition & 1 deletion catalogbuilder/tests/test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
headerlist: ["activity_id", "institution_id", "source_id", "experiment_id",
"frequency", "realm", "table_id",
"member_id", "grid_label", "variable_id",
"time_range", "chunk_freq","grid_label","platform","dimensions","cell_methods","path"]
"time_range", "chunk_freq","grid_label","platform","dimensions","cell_methods","standard_name","path"]

#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
Expand Down

0 comments on commit bcb2d17

Please sign in to comment.