Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

59 cli bugfix #63

Merged
merged 13 commits into from
Sep 26, 2024
9 changes: 6 additions & 3 deletions catalogbuilder/intakebuilder/configparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,17 @@ def __init__(self, config):
configfile = yaml.safe_load(file)
try:
self.input_path = configfile['input_path']
#print("input_path :",self.input_path)
except:
raise KeyError("input_path does not exist in config")
self.input_path = None
print("input_path does not exist in config")
pass
try:
self.output_path = configfile['output_path']
#print("output_path :",self.output_path)
except:
raise KeyError("output_path does not exist in config")
self.output_path = None
print("output_path does not exist in config")
pass
try:
self.headerlist = configfile['headerlist']
print("headerlist :",self.headerlist)
Expand Down
33 changes: 33 additions & 0 deletions catalogbuilder/scripts/configs/config-example2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
# the output_path_template is set as follows.
#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we
#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example
#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure
#this is a valid value in headerlist as well.
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
#for the fourth value.

#catalog headers
#The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction
#with the ESM collection specification standards and the appropriate workflows.

headerlist: ["activity_id", "institution_id", "source_id", "experiment_id",
"frequency", "realm", "table_id",
"member_id", "grid_label", "variable_id",
"time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"]

#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
# the output_path_template is set as follows.
#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we
#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example
#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure
#this is a valid value in headerlist as well.
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
#for the fourth value.

output_path_template: ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq']

output_file_template: ['realm','time_range','variable_id']

2 changes: 0 additions & 2 deletions catalogbuilder/scripts/gen_intake_gfdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm=
# TODO error catching
if (config is not None):
configyaml = configparser.Config(config)
if configyaml.input_path is None or not configyaml.input_path :
sys.exit("Can't find paths, is yaml configured?")
if(input_path is None):
input_path = configyaml.input_path
if(output_path is None):
Expand Down
13 changes: 9 additions & 4 deletions catalogbuilder/scripts/gen_intake_gfdl_runner_config.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
#!/usr/bin/env python

import catalogbuilder
from catalogbuilder.scripts import gen_intake_gfdl
import sys, os

#This is an example call to run catalog builder using a yaml config file.
package_dir = os.path.dirname(os.path.abspath(__file__))
configyaml = os.path.join(package_dir, 'configs/config-example.yml')
input_path = "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp"
output_path = "sample-mdtf-catalog"

def create_catalog_from_config(config=configyaml):
csv, json = gen_intake_gfdl.create_catalog(config=configyaml)
def create_catalog_from_config(input_path=input_path,output_path=output_path,configyaml=configyaml):
csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml)
return(csv,json)

if __name__ == '__main__':
create_catalog_from_config(input_path,output_path,configyaml)


18 changes: 18 additions & 0 deletions gen_intake_gfdl_runner_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import catalogbuilder
from catalogbuilder.scripts import gen_intake_gfdl
import sys, os

#This is an example call to run catalog builder using a yaml config file.

def create_catalog_from_config(input_path,output_path,configyaml):
csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml)
return(csv,json)

if __name__ == '__main__':
package_dir = os.path.dirname(os.path.abspath(__file__))
configyaml = os.path.join(package_dir, 'catalogbuilder/scripts/configs/config-example2.yml')
input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp"
output_path = "sample-test"
create_catalog_from_config(input_path,output_path,configyaml)