Skip to content

Commit

Permalink
Merge pull request #63 from aradhakrishnanGFDL/59-cli-bugfix
Browse files Browse the repository at this point in the history
59 cli bugfix
  • Loading branch information
ceblanton authored Sep 26, 2024
2 parents 876ceff + b57db73 commit f186bf4
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 9 deletions.
9 changes: 6 additions & 3 deletions catalogbuilder/intakebuilder/configparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@ def __init__(self, config):
configfile = yaml.safe_load(file)
try:
self.input_path = configfile['input_path']
#print("input_path :",self.input_path)
except:
raise KeyError("input_path does not exist in config")
self.input_path = None
print("input_path does not exist in config")
pass
try:
self.output_path = configfile['output_path']
#print("output_path :",self.output_path)
except:
raise KeyError("output_path does not exist in config")
self.output_path = None
print("output_path does not exist in config")
pass
try:
self.headerlist = configfile['headerlist']
print("headerlist :",self.headerlist)
Expand Down
33 changes: 33 additions & 0 deletions catalogbuilder/scripts/configs/config-example2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
# the output_path_template is set as follows.
#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we
#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example
#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure
#this is a valid value in headerlist as well.
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
#for the fourth value.

#catalog headers
#The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction
#with the ESM collection specification standards and the appropriate workflows.

headerlist: ["activity_id", "institution_id", "source_id", "experiment_id",
"frequency", "realm", "table_id",
"member_id", "grid_label", "variable_id",
"time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"]

#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
# the output_path_template is set as follows.
#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we
#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example
#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure
#this is a valid value in headerlist as well.
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
#for the fourth value.

output_path_template: ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq']

output_file_template: ['realm','time_range','variable_id']

2 changes: 0 additions & 2 deletions catalogbuilder/scripts/gen_intake_gfdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm=
# TODO error catching
if (config is not None):
configyaml = configparser.Config(config)
if configyaml.input_path is None or not configyaml.input_path :
sys.exit("Can't find paths, is yaml configured?")
if(input_path is None):
input_path = configyaml.input_path
if(output_path is None):
Expand Down
13 changes: 9 additions & 4 deletions catalogbuilder/scripts/gen_intake_gfdl_runner_config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
#!/usr/bin/env python

import catalogbuilder
from catalogbuilder.scripts import gen_intake_gfdl
import sys, os

#This is an example call to run catalog builder using a yaml config file.
package_dir = os.path.dirname(os.path.abspath(__file__))
configyaml = os.path.join(package_dir, 'configs/config-example.yml')
input_path = "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp"
output_path = "sample-mdtf-catalog"

def create_catalog_from_config(config=configyaml):
csv, json = gen_intake_gfdl.create_catalog(config=configyaml)
def create_catalog_from_config(input_path=input_path,output_path=output_path,configyaml=configyaml):
csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml)
return(csv,json)

if __name__ == '__main__':
create_catalog_from_config(input_path,output_path,configyaml)


18 changes: 18 additions & 0 deletions gen_intake_gfdl_runner_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import catalogbuilder
from catalogbuilder.scripts import gen_intake_gfdl
import sys, os

#This is an example call to run catalog builder using a yaml config file.

def create_catalog_from_config(input_path,output_path,configyaml):
csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml)
return(csv,json)

if __name__ == '__main__':
package_dir = os.path.dirname(os.path.abspath(__file__))
configyaml = os.path.join(package_dir, 'catalogbuilder/scripts/configs/config-example2.yml')
input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp"
output_path = "sample-test"
create_catalog_from_config(input_path,output_path,configyaml)


0 comments on commit f186bf4

Please sign in to comment.