Skip to content

Commit

Permalink
fixup import things. breakup some long lines.
Browse files Browse the repository at this point in the history
  • Loading branch information
ilaflott committed Jul 22, 2024
1 parent 9c923fc commit f1ab693
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 31 deletions.
2 changes: 1 addition & 1 deletion catalogbuilder/intakebuilder/CSVwriter.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os.path
import csv
from csv import writer
from intakebuilder import builderconfig, configparser
from . import builderconfig, configparser

def getHeader(configyaml):
'''
Expand Down
2 changes: 1 addition & 1 deletion catalogbuilder/intakebuilder/getinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from csv import writer
import os
import xarray as xr
from intakebuilder import builderconfig, configparser
from . import builderconfig, configparser


'''
Expand Down
2 changes: 1 addition & 1 deletion catalogbuilder/intakebuilder/gfdlcrawler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from intakebuilder import getinfo, builderconfig
from . import getinfo, builderconfig
import sys
import re
import operator as op
Expand Down
23 changes: 8 additions & 15 deletions catalogbuilder/scripts/gen_intake_gfdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,14 @@
logger.setLevel(logging.INFO)

try:
from intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser
except ModuleNotFoundError:
print("The module intakebuilder is not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")
print("Attempting again with adjusted sys.path ")
try:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
except:
print("Unable to adjust sys.path")
#print(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
from intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser
except ModuleNotFoundError:
sys.exit("The module 'intakebuilder' is still not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ")
from catalogbuilder.intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser
except ModuleNotFoundError as exc:
raise Exception(f"import problems!!!") from exc

package_dir = os.path.dirname(os.path.abspath(__file__))
template_path = os.path.join(package_dir, '../cats/gfdl_template.json')

import catalogbuilder.cats
template_path = catalogbuilder.cats.__path__[0] + '/gfdl_template.json'

#Setting up argument parsing/flags
@click.command()
Expand All @@ -35,7 +27,8 @@
#,help='The directory path with the datasets to be cataloged. E.g a GFDL PP path till /pp')
@click.argument('output_path',required=False,nargs=1)
#,help='Specify output filename suffix only. e.g. catalog')
@click.option('--config',required=False,type=click.Path(exists=True),nargs=1,help='Path to your yaml config, Use the config_template in intakebuilder repo')
@click.option('--config',required=False,type=click.Path(exists=True),nargs=1,
help='Path to your yaml config, Use the config_template in intakebuilder repo')
@click.option('--filter_realm', nargs=1)
@click.option('--filter_freq', nargs=1)
@click.option('--filter_chunk', nargs=1)
Expand Down
16 changes: 11 additions & 5 deletions catalogbuilder/scripts/gen_intake_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,22 @@

def main():
#######INPUT HERE OR USE FROM A CONFIG FILE LATER######
# project_dir = "/Users/ar46/data_cmip6/CMIP6/" # DRS COMPLIANT PROJECT DIR
#project_dir = "/Users/ar46/data_cmip6/CMIP6/" # DRS COMPLIANT PROJECT DIR
project_dir = "/uda/CMIP6/"#
#CMIP/NOAA-GFDL/GFDL-ESM4/"
csvfile = "/nbhome/a1r/intakebuilder_cats/intake_local.csv" ##"/Users/ar46/PycharmProjects/CatalogBuilder/intakebuilder/test/intake_local.csv"
##"/Users/ar46/PycharmProjects/CatalogBuilder/intakebuilder/test/intake_local.csv"
csvfile = "/nbhome/a1r/intakebuilder_cats/intake_local.csv"
#######################################################

######### SEARCH FILTERS ###########################
dictFilter = {}
dictFilter["source_prefix"]= 'CMIP6/' #CMIP/CMCC/CMCC-CM2-SR5' #'CMIP6/CMIP/' #NOAA-GFDL/GFDL-CM4/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
#COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
#COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name
dictFilter["source_prefix"]= 'CMIP6/'
#CMIP/CMCC/CMCC-CM2-SR5' #'CMIP6/CMIP/'
#NOAA-GFDL/GFDL-CM4/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
#COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
#COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name
#########################################################

dictInfo = {}
project_dir = project_dir.rstrip("/")
logger.info("Calling localcrawler.crawlLocal")
Expand All @@ -32,5 +37,6 @@ def main():
CSVwriter.listdict_to_csv(list_files, headers, csvfile)
print("CSV generated at:", os.path.abspath(csvfile))
logger.info("CSV generated at"+ os.path.abspath(csvfile))

if __name__ == '__main__':
main()
4 changes: 2 additions & 2 deletions catalogbuilder/scripts/gen_intake_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def main():
######### SEARCH FILTERS ###########################
dictFilter = {}
dictFilter["source_prefix"]= 'CMIP6/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level
#COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
#COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name
#COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable
#COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name
#######################################################
project_bucket = project_root.split("/")[1].lstrip("/")
project_name = project_root.split("/")[2]
Expand Down
21 changes: 16 additions & 5 deletions catalogbuilder/scripts/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,22 @@
@click.command()
@click.argument('json_path', nargs = 1 , required = True)
@click.argument('json_template_path', nargs = 1 , required = False)
@click.option('-tf', '--test-failure', is_flag=True, default = False, help="Errors are only printed. Program will not exit.")
@click.option('-tf', '--test-failure', is_flag=True, default = False,
help="Errors are only printed. Program will not exit.")
def main(json_path,json_template_path,test_failure):

""" This test ensures catalogs generated by the Catalog Builder tool are minimally valid. This means a few things: the generated catalog JSON file reflects the template it was generated with, the catalog CSV has atleast one row of values (not headers), and each required column exists without any empty values. If a test case is broken or expected to fail, the --test-failure/-tf flag can be used. This flag will simply print errors instead of doing a sys.exit.
""" This test ensures catalogs generated by the Catalog Builder tool are minimally valid.
This means a few things: the generated catalog JSON file reflects the template it was
generated with, the catalog CSV has atleast one row of values (not headers), and each
required column exists without any empty values. If a test case is broken or expected to
fail, the --test-failure/-tf flag can be used. This flag will simply print errors
instead of doing a sys.exit.
JSON_PATH: Path to generated schema to be tested
JSON_PATH: Path to generated schema to be tested
JSON_TEMPLATE_PATH: Path of schema template. Without a given path, cats/gfdl_template.json will be used for comparison """
JSON_TEMPLATE_PATH: Path of schema template. Without a given path, cats/gfdl_template.json
will be used for comparison
"""

#Open JSON
j = json.load(open(json_path))
Expand Down Expand Up @@ -51,7 +59,10 @@ def main(json_path,json_template_path,test_failure):
errors = 0
for column in req:
if column not in catalog.columns:
print(f"The required column '{column}' does not exist in the csv. In other words, there is some inconsistency between the json and the csv file. Please check out info listed under aggregation_control and groupby_attrs in your json file and verify if those columns show up in the csv as well.")
print(f"The required column '{column}' does not exist in the csv. In other words, "
"there is some inconsistency between the json and the csv file. Please check "
"out info listed under aggregation_control and groupby_attrs in your json file"
" and verify if those columns show up in the csv as well." )
errors += 1

if column in catalog.columns:
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- default
dependencies:
- conda
- python=3.7
- python
- conda-env
- conda-build
- conda-verify
Expand Down

0 comments on commit f1ab693

Please sign in to comment.