From f1ab693756da5c302d98ba1533d8101fa5407a4f Mon Sep 17 00:00:00 2001 From: Ian Laflotte Date: Mon, 22 Jul 2024 15:42:34 -0400 Subject: [PATCH] fixup import things. breakup some long lines. --- catalogbuilder/intakebuilder/CSVwriter.py | 2 +- catalogbuilder/intakebuilder/getinfo.py | 2 +- catalogbuilder/intakebuilder/gfdlcrawler.py | 2 +- catalogbuilder/scripts/gen_intake_gfdl.py | 23 +++++++-------------- catalogbuilder/scripts/gen_intake_local.py | 16 +++++++++----- catalogbuilder/scripts/gen_intake_s3.py | 4 ++-- catalogbuilder/scripts/test_catalog.py | 21 ++++++++++++++----- environment.yml | 2 +- 8 files changed, 41 insertions(+), 31 deletions(-) diff --git a/catalogbuilder/intakebuilder/CSVwriter.py b/catalogbuilder/intakebuilder/CSVwriter.py index 9a6a33f..7819f17 100644 --- a/catalogbuilder/intakebuilder/CSVwriter.py +++ b/catalogbuilder/intakebuilder/CSVwriter.py @@ -1,7 +1,7 @@ import os.path import csv from csv import writer -from intakebuilder import builderconfig, configparser +from . import builderconfig, configparser def getHeader(configyaml): ''' diff --git a/catalogbuilder/intakebuilder/getinfo.py b/catalogbuilder/intakebuilder/getinfo.py index d974c29..53e1185 100644 --- a/catalogbuilder/intakebuilder/getinfo.py +++ b/catalogbuilder/intakebuilder/getinfo.py @@ -4,7 +4,7 @@ from csv import writer import os import xarray as xr -from intakebuilder import builderconfig, configparser +from . import builderconfig, configparser ''' diff --git a/catalogbuilder/intakebuilder/gfdlcrawler.py b/catalogbuilder/intakebuilder/gfdlcrawler.py index dd81c04..d8f871a 100644 --- a/catalogbuilder/intakebuilder/gfdlcrawler.py +++ b/catalogbuilder/intakebuilder/gfdlcrawler.py @@ -1,5 +1,5 @@ import os -from intakebuilder import getinfo, builderconfig +from . import getinfo, builderconfig import sys import re import operator as op diff --git a/catalogbuilder/scripts/gen_intake_gfdl.py b/catalogbuilder/scripts/gen_intake_gfdl.py index a99b667..8a2f6ae 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl.py +++ b/catalogbuilder/scripts/gen_intake_gfdl.py @@ -11,22 +11,14 @@ logger.setLevel(logging.INFO) try: - from intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser -except ModuleNotFoundError: - print("The module intakebuilder is not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ") - print("Attempting again with adjusted sys.path ") - try: - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - except: - print("Unable to adjust sys.path") - #print(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - try: - from intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser - except ModuleNotFoundError: - sys.exit("The module 'intakebuilder' is still not installed. Do you have intakebuilder in your sys.path or have you activated the conda environment with the intakebuilder package in it? ") + from catalogbuilder.intakebuilder import gfdlcrawler, CSVwriter, builderconfig, configparser +except ModuleNotFoundError as exc: + raise Exception(f"import problems!!!") from exc package_dir = os.path.dirname(os.path.abspath(__file__)) -template_path = os.path.join(package_dir, '../cats/gfdl_template.json') + +import catalogbuilder.cats +template_path = catalogbuilder.cats.__path__[0] + '/gfdl_template.json' #Setting up argument parsing/flags @click.command() @@ -35,7 +27,8 @@ #,help='The directory path with the datasets to be cataloged. E.g a GFDL PP path till /pp') @click.argument('output_path',required=False,nargs=1) #,help='Specify output filename suffix only. e.g. catalog') -@click.option('--config',required=False,type=click.Path(exists=True),nargs=1,help='Path to your yaml config, Use the config_template in intakebuilder repo') +@click.option('--config',required=False,type=click.Path(exists=True),nargs=1, + help='Path to your yaml config, Use the config_template in intakebuilder repo') @click.option('--filter_realm', nargs=1) @click.option('--filter_freq', nargs=1) @click.option('--filter_chunk', nargs=1) diff --git a/catalogbuilder/scripts/gen_intake_local.py b/catalogbuilder/scripts/gen_intake_local.py index 673cd16..ad06b57 100755 --- a/catalogbuilder/scripts/gen_intake_local.py +++ b/catalogbuilder/scripts/gen_intake_local.py @@ -10,17 +10,22 @@ def main(): #######INPUT HERE OR USE FROM A CONFIG FILE LATER###### -# project_dir = "/Users/ar46/data_cmip6/CMIP6/" # DRS COMPLIANT PROJECT DIR + #project_dir = "/Users/ar46/data_cmip6/CMIP6/" # DRS COMPLIANT PROJECT DIR project_dir = "/uda/CMIP6/"# #CMIP/NOAA-GFDL/GFDL-ESM4/" - csvfile = "/nbhome/a1r/intakebuilder_cats/intake_local.csv" ##"/Users/ar46/PycharmProjects/CatalogBuilder/intakebuilder/test/intake_local.csv" + ##"/Users/ar46/PycharmProjects/CatalogBuilder/intakebuilder/test/intake_local.csv" + csvfile = "/nbhome/a1r/intakebuilder_cats/intake_local.csv" ####################################################### + ######### SEARCH FILTERS ########################### dictFilter = {} - dictFilter["source_prefix"]= 'CMIP6/' #CMIP/CMCC/CMCC-CM2-SR5' #'CMIP6/CMIP/' #NOAA-GFDL/GFDL-CM4/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level - #COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable - #COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name + dictFilter["source_prefix"]= 'CMIP6/' + #CMIP/CMCC/CMCC-CM2-SR5' #'CMIP6/CMIP/' + #NOAA-GFDL/GFDL-CM4/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level + #COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable + #COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name ######################################################### + dictInfo = {} project_dir = project_dir.rstrip("/") logger.info("Calling localcrawler.crawlLocal") @@ -32,5 +37,6 @@ def main(): CSVwriter.listdict_to_csv(list_files, headers, csvfile) print("CSV generated at:", os.path.abspath(csvfile)) logger.info("CSV generated at"+ os.path.abspath(csvfile)) + if __name__ == '__main__': main() diff --git a/catalogbuilder/scripts/gen_intake_s3.py b/catalogbuilder/scripts/gen_intake_s3.py index 69a8afb..8eccc60 100755 --- a/catalogbuilder/scripts/gen_intake_s3.py +++ b/catalogbuilder/scripts/gen_intake_s3.py @@ -15,8 +15,8 @@ def main(): ######### SEARCH FILTERS ########################### dictFilter = {} dictFilter["source_prefix"]= 'CMIP6/' #/CMIP/NOAA-GFDL/GFDL-ESM4/' #Must specify something here, at least the project level - #COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable - #COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name + #COMMENT dictFilter["miptable"] = "Amon" #Remove this if you don't want to filter by miptable + #COMMENT dictFilter["varname"] = "tas" #Remove this if you don't want to filter by variable name ####################################################### project_bucket = project_root.split("/")[1].lstrip("/") project_name = project_root.split("/")[2] diff --git a/catalogbuilder/scripts/test_catalog.py b/catalogbuilder/scripts/test_catalog.py index ed44d95..1d838da 100755 --- a/catalogbuilder/scripts/test_catalog.py +++ b/catalogbuilder/scripts/test_catalog.py @@ -9,14 +9,22 @@ @click.command() @click.argument('json_path', nargs = 1 , required = True) @click.argument('json_template_path', nargs = 1 , required = False) -@click.option('-tf', '--test-failure', is_flag=True, default = False, help="Errors are only printed. Program will not exit.") +@click.option('-tf', '--test-failure', is_flag=True, default = False, + help="Errors are only printed. Program will not exit.") def main(json_path,json_template_path,test_failure): - """ This test ensures catalogs generated by the Catalog Builder tool are minimally valid. This means a few things: the generated catalog JSON file reflects the template it was generated with, the catalog CSV has atleast one row of values (not headers), and each required column exists without any empty values. If a test case is broken or expected to fail, the --test-failure/-tf flag can be used. This flag will simply print errors instead of doing a sys.exit. + """ This test ensures catalogs generated by the Catalog Builder tool are minimally valid. + This means a few things: the generated catalog JSON file reflects the template it was + generated with, the catalog CSV has atleast one row of values (not headers), and each + required column exists without any empty values. If a test case is broken or expected to + fail, the --test-failure/-tf flag can be used. This flag will simply print errors + instead of doing a sys.exit. - JSON_PATH: Path to generated schema to be tested + JSON_PATH: Path to generated schema to be tested - JSON_TEMPLATE_PATH: Path of schema template. Without a given path, cats/gfdl_template.json will be used for comparison """ + JSON_TEMPLATE_PATH: Path of schema template. Without a given path, cats/gfdl_template.json + will be used for comparison + """ #Open JSON j = json.load(open(json_path)) @@ -51,7 +59,10 @@ def main(json_path,json_template_path,test_failure): errors = 0 for column in req: if column not in catalog.columns: - print(f"The required column '{column}' does not exist in the csv. In other words, there is some inconsistency between the json and the csv file. Please check out info listed under aggregation_control and groupby_attrs in your json file and verify if those columns show up in the csv as well.") + print(f"The required column '{column}' does not exist in the csv. In other words, " + "there is some inconsistency between the json and the csv file. Please check " + "out info listed under aggregation_control and groupby_attrs in your json file" + " and verify if those columns show up in the csv as well." ) errors += 1 if column in catalog.columns: diff --git a/environment.yml b/environment.yml index 9c0683c..47bf473 100644 --- a/environment.yml +++ b/environment.yml @@ -4,7 +4,7 @@ channels: - default dependencies: - conda - - python=3.7 + - python - conda-env - conda-build - conda-verify