Skip to content

Commit

Permalink
Merge pull request #24 from aradhakrishnanGFDL/16-pythoncalls
Browse files Browse the repository at this point in the history
16 pythoncalls
  • Loading branch information
aradhakrishnanGFDL authored Aug 1, 2024
2 parents bcb2d17 + c23e862 commit c949d6b
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 47 deletions.
8 changes: 0 additions & 8 deletions .github/workflows/conda-env-create-run-pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,6 @@ jobs:
# install catalogbuilder to conda env directories
$CONDA/envs/catalogbuilder/bin/python -m pip install --prefix $CONDA/envs/catalogbuilder .
- name: Run pytest in catalogbuilder conda environment
run: |
which python
python --version
$CONDA/envs/catalogbuilder/bin/python --version
# which pytest
$CONDA/envs/catalogbuilder/bin/pytest catalogbuilder
- name: Make sample data
run: |
which python
Expand Down
35 changes: 35 additions & 0 deletions .github/workflows/conda-pkg-extra-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: conda-pkg-extra-tests
on:
pull_request:
branches:
# for testing conda build w no upload during PRs
- main
jobs:
build:
runs-on: ubuntu-latest
container:
image: continuumio/miniconda3:latest
steps:
- name: Checkout Files
uses: actions/checkout@v4
- name: Run Docker to Build
run: |
conda config --append channels conda-forge
conda config --append channels noaa-gfdl
conda install conda-build conda-verify
conda build .
- name: Run additional utilities as tests
run: |
conda create --name catalogbuildertest
conda install -n catalogbuildertest catalogbuilder --use-local
/opt/conda/envs/catalogbuildertest/bin/pytest catalogbuilder/tests/test_create_catalog.py
#we will save the output from following alone as manifest
- name: upload-artifacts
uses: actions/upload-artifact@v4
with:
name: workflow-artifacts1
path: |
sample-mdtf-catalog.csv
sample-mdtf-catalog.json
- name: Download all workflow run artifacts
uses: actions/download-artifact@v4
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,4 @@ Cite our work: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5196586.svg)]

See our [project documentation site ](https://noaa-gfdl.github.io/CatalogBuilder/).


This project follows the [all-contributors](https://github.com/all-contributors/all-contributors) specification. Contributions of any kind welcome!
43 changes: 41 additions & 2 deletions catalogbuilder/scripts/configs/config-example.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,41 @@
input_path: "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/" #"ENTER INPUT PATH HERE" #Example: /Users/ar46/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/"
output_path: "catalog" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)
#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
# the output_path_template is set as follows.
#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we
#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example
#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure
#this is a valid value in headerlist as well.
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
#for the fourth value.

#catalog headers
#The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction
#with the ESM collection specification standards and the appropriate workflows.

headerlist: ["activity_id", "institution_id", "source_id", "experiment_id",
"frequency", "realm", "table_id",
"member_id", "grid_label", "variable_id",
"time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"]

#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
# the output_path_template is set as follows.
#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we
#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example
#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure
#this is a valid value in headerlist as well.
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
#for the fourth value.

output_path_template: ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq']

output_file_template: ['realm','time_range','variable_id']

#OUTPUT FILE INFO is currently passed as command-line argument.
#We will revisit adding a csvfile, jsonfile and logfile configuration to the builder configuration file in the future.
#csvfile = #jsonfile = #logfile =

#######################################################

input_path: "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp"
output_path: "sample-mdtf-catalog" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)
38 changes: 20 additions & 18 deletions catalogbuilder/scripts/gen_intake_gfdl.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,8 @@
package_dir = os.path.dirname(os.path.abspath(__file__))
template_path = os.path.join(package_dir, '../cats/gfdl_template.json')

#Setting up argument parsing/flags
@click.command()
#TODO arguments dont have help message. So consider changing arguments to options?
@click.argument('input_path',required=False,nargs=1)
#,help='The directory path with the datasets to be cataloged. E.g a GFDL PP path till /pp')
@click.argument('output_path',required=False,nargs=1)
#,help='Specify output filename suffix only. e.g. catalog')
@click.option('--config',required=False,type=click.Path(exists=True),nargs=1,help='Path to your yaml config, Use the config_template in intakebuilder repo')
@click.option('--filter_realm', nargs=1)
@click.option('--filter_freq', nargs=1)
@click.option('--filter_chunk', nargs=1)
@click.option('--overwrite', is_flag=True, default=False)
@click.option('--append', is_flag=True, default=False)
@click.option('--slow','-s', is_flag=True, default=False)
def main(input_path=None, output_path=None, config=None, filter_realm=None, filter_freq=None, filter_chunk=None,
def create_catalog(input_path=None, output_path=None, config=None, filter_realm=None, filter_freq=None, filter_chunk=None,
overwrite=False, append=False, slow = False):

configyaml = None
# TODO error catching
#print("input path: ",input_path, " output path: ", output_path)
Expand Down Expand Up @@ -86,7 +71,6 @@ def main(input_path=None, output_path=None, config=None, filter_realm=None, filt
dictFilter["chunk_freq"] = "5yr"
dictFilterIgnore["remove"]= 'DO_NOT_USE'
'''
#########################################################
dictInfo = {}
project_dir = project_dir.rstrip("/")
logger.info("Calling gfdlcrawler.crawlLocal")
Expand Down Expand Up @@ -125,7 +109,25 @@ def main(input_path=None, output_path=None, config=None, filter_realm=None, filt
print("JSON generated at:", os.path.abspath(json_path))
print("CSV generated at:", os.path.abspath(csv_path))
logger.info("CSV generated at" + os.path.abspath(csv_path))
return(csv_path,json_path)

#Setting up argument parsing/flags
@click.command()
#TODO arguments dont have help message. So consider changing arguments to options?
@click.argument('input_path',required=False,nargs=1)
#,help='The directory path with the datasets to be cataloged. E.g a GFDL PP path till /pp')
@click.argument('output_path',required=False,nargs=1)
#,help='Specify output filename suffix only. e.g. catalog')
@click.option('--config',required=False,type=click.Path(exists=True),nargs=1,help='Path to your yaml config, Use the config_template in intakebuilder repo')
@click.option('--filter_realm', nargs=1)
@click.option('--filter_freq', nargs=1)
@click.option('--filter_chunk', nargs=1)
@click.option('--overwrite', is_flag=True, default=False)
@click.option('--append', is_flag=True, default=False)
@click.option('--slow','-s', is_flag=True, default=False)

def create_catalog_cli(**kwargs):
return create_catalog(**kwargs)

if __name__ == '__main__':
main()
create_catalog_cli()
12 changes: 6 additions & 6 deletions catalogbuilder/scripts/gen_intake_gfdl_runner.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/usr/bin/env python

#TODO test after conda pkg is published and make changes as needed
#from catalogbuilder.scripts import gen_intake_gfdl
from . import gen_intake_gfdl
from catalogbuilder.scripts import gen_intake_gfdl
import sys

input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/"
input_path = "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp"
output_path = "test"
sys.argv = ['INPUT_PATH', input_path, output_path]
print(sys.argv)
gen_intake_gfdl.main()
try:
gen_intake_gfdl.create_catalog(input_path,output_path)
except:
sys.exit("Exception occured calling gen_intake_gfdl.create_catalog")

18 changes: 10 additions & 8 deletions catalogbuilder/scripts/gen_intake_gfdl_runner_config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#!/usr/bin/env python

#from catalogbuilder.scripts import gen_intake_gfdl
from . import gen_intake_gfdl
import sys

# this will break at some point #TODO
sys.argv = ['input_path','--config', '/home/a1r/github/CatalogBuilder/scripts/configs/config-example.yml']
print(sys.argv)
gen_intake_gfdl.main()
from catalogbuilder.scripts import gen_intake_gfdl
import sys, os

#This is an example call to run catalog builder using a yaml config file.
package_dir = os.path.dirname(os.path.abspath(__file__))
configyaml = os.path.join(package_dir, 'configs/config-example.yml')

def create_catalog_from_config(config=configyaml):
csv, json = gen_intake_gfdl.create_catalog(config=configyaml)
return(csv,json)

15 changes: 11 additions & 4 deletions catalogbuilder/tests/make_sample_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,24 @@
"""
import os
import subdirs
from subdirs import *
from pathlib import Path

realm_mapping = [realm]
root_dir = 'archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp'
freq_mapping = [freq]
chunk_freq = '1yr'

def make_sample_data():
# Create directory
try:
import subdirs
except:
import sys
print((os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),"tests")))
sys.path.append(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))),"tests"))
import subdirs
from subdirs import realm, freq, time,vars
realm_mapping = [realm]
freq_mapping = [freq]

realm_ctr = (len(subdirs.realm))
i = 0
for j in range(0, realm_ctr):
Expand Down
15 changes: 15 additions & 0 deletions catalogbuilder/tests/test_create_catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
def test_create_catalog():
from pathlib import Path
import catalogbuilder
from catalogbuilder.scripts import gen_intake_gfdl_runner_config
from catalogbuilder.tests import make_sample_data
make_sample_data.make_sample_data()
json, csv = gen_intake_gfdl_runner_config.create_catalog_from_config()
#to output success/failure in pytest run with conda pkg local install in extra-tests CI workflow#
print(csv)
csvpath = Path(csv)
jsonpath = Path(json)
assert csvpath.is_file()
assert jsonpath.is_file()


0 comments on commit c949d6b

Please sign in to comment.