From 004fbf7a6db78003009bbba2350baf950931b0fb Mon Sep 17 00:00:00 2001 From: Ciheim Brown Date: Tue, 3 Dec 2024 11:16:48 -0500 Subject: [PATCH 1/2] Some minor doc updates --- doc/background.rst | 2 +- doc/generation.rst | 7 ++++--- doc/index.rst | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/background.rst b/doc/background.rst index 1f2f458..14593ca 100644 --- a/doc/background.rst +++ b/doc/background.rst @@ -3,7 +3,7 @@ Background The catalog builder project is a “python community package ecosystem” that allows you to generate data catalogs compatible with intake-esm. Available as a Conda package. -See our `Github repository here `_. +See our `Github repository here `_. We have contributing guidelines and code of conduct documented in our GitHub repo. We welcome your contributions. Brief overview on data catalogs diff --git a/doc/generation.rst b/doc/generation.rst index 4ef8bd2..a90698f 100644 --- a/doc/generation.rst +++ b/doc/generation.rst @@ -98,7 +98,7 @@ Catalog headers (column names) are set with the *HEADER LIST* variable. The *OUT "member_id", "grid_label", "variable_id", "time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"] -The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction +The headerlist contains the expected column names of your catalog/csv file. This is usually determined by the users in conjuction with the ESM collection specification standards and the appropriate workflows. .. code-block:: yaml @@ -183,5 +183,6 @@ _____ .. Reference `Flags`_. -- overwrite - Overwrite an existing catalog at the given output path -- append - Append (without headerlist) to an existing catalog at the given output path +- --overwrite - Overwrite an existing catalog at the given output path +- --append - Append (without headerlist) to an existing catalog at the given output path +- --slow - Activates slow mode which retrieves standard_name `(or long_name) where possible. **"Standard_name" must be in your output_path_template** diff --git a/doc/index.rst b/doc/index.rst index 0e6225e..fafceb4 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -11,7 +11,7 @@ The Catalog Builder API will collect building blocks necessary to build a data c Tested on posix file system, S3 and GFDL post-processed (select simulations, components) at this time. This repository has unit tests (pytest) and incorporated the same in GitHub Actions, when a PR is open or a push is initiated. -See our `Github repository `_ here. +See our `Github repository `_ here. .. toctree:: :maxdepth: 2 From e523119e79b2bc9127bdf5c53ef58adf11a777e9 Mon Sep 17 00:00:00 2001 From: Ciheim Brown Date: Tue, 10 Dec 2024 10:09:16 -0500 Subject: [PATCH 2/2] Doc changes --- .../scripts/gen_intake_gfdl_notebook.ipynb | 4829 ----------------- doc/generation.rst | 96 +- 2 files changed, 81 insertions(+), 4844 deletions(-) delete mode 100644 catalogbuilder/scripts/gen_intake_gfdl_notebook.ipynb diff --git a/catalogbuilder/scripts/gen_intake_gfdl_notebook.ipynb b/catalogbuilder/scripts/gen_intake_gfdl_notebook.ipynb deleted file mode 100644 index ef4115d..0000000 --- a/catalogbuilder/scripts/gen_intake_gfdl_notebook.ipynb +++ /dev/null @@ -1,4829 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "f39f9409-ee87-4431-9953-55607daba427", - "metadata": {}, - "source": [ - "This notebook was tested from a GFDL workstation.\n", - "This notebook is an example of using catalog builder from a notebook to generate data catalogs, a.k.a intake-esm catalogs.\n", - "\n", - "How to get here? \n", - "\n", - "Login to your workstation at GFDL.\n", - "module load python/3.9\n", - "conda activate intakebuilder \n", - "(For the above: Note that you can either install your own environment using the following or use an existing environment such as this: conda activate /nbhome/Aparna.Radhakrishnan/conda/envs/intakebuilder )\n", - "\n", - "conda create -n intakebuilder \n", - "conda install intakebuilder -c noaa-gfdl -n intakebuilder\n", - "\n", - "Now, we do a couple of things to make sure your environment is available to jupyter-lab as a kernel.\n", - "\n", - "pip install ipykernel \n", - "python -m ipykernel install --user --name=intakebuilder\n", - "\n", - "Now, start a jupyter-lab session from GFDL workstation: \n", - "\n", - "jupyter-lab \n", - "\n", - "This will give you the URL to the jupyter-lab session running on your localhost. Paste the URL in your web-browser (or via TigerVNC). Paste the notebook cells from this notebook, or locate the notebook from the path where you have downloaded or cloned it via git. Go to Kernel->Change Kernel-> Choose intakebuilder.\n", - "\n", - "Run the notebook and see the results! Extend it and share it with us via a github issue. \n" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "fb3010b8-170f-4462-ad2a-457d1d5415f7", - "metadata": {}, - "outputs": [ - { - "name": "stdin", - "output_type": "stream", - "text": [ - "Found existing file! Overwrite? (y/n) y\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "writing..\n", - "JSON generated at: /home/a1r/mycatalog.json\n", - "CSV generated at: /home/a1r/mycatalog.csv\n" - ] - } - ], - "source": [ - "from catalogbuilder.scripts import gen_intake_gfdl\n", - "import sys,os\n", - "\n", - "######USER input begins########\n", - "\n", - "#User provides the input directory for which a data catalog needs to be generated.\n", - "#Note that depending on the date and version of the tool, only time-series data are catalogued.\n", - "\n", - "input_path = \"/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/\"\n", - "\n", - "#USER inputs the output path. Based on the following setting, user can expect to see /home/a1r/mycatalog.csv and /home/a1r/mycatalog.json generated as output.\n", - "\n", - "output_path = \"/home/a1r/mycatalog\"\n", - "\n", - "####END OF user input ##########\n", - "sys.argv = ['--INPUT_PATH', input_path, output_path]\n", - "\n", - "try:\n", - " gen_intake_gfdl.main()\n", - "except SystemExit as e:\n", - " if e.code != 0:\n", - " raise" - ] - }, - { - "cell_type": "markdown", - "id": "626eaa1f-d801-4a7d-8fad-2851c9e81070", - "metadata": {}, - "source": [ - "Let's begin our analysis" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "181913cc-4776-4b16-95d6-c6ea1b2cbdad", - "metadata": {}, - "outputs": [], - "source": [ - "import intake_esm, intake\n", - "import matplotlib #do a pip install of tools needed in your env or from the notebook\n", - "from matplotlib import pyplot as plt\n", - "%matplotlib inline\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6665a48b-a335-4fc2-8130-1a4902a428b0", - "metadata": {}, - "outputs": [], - "source": [ - "pip install matplotlib" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "0f83dbc3-3dda-4a43-82e9-fb8726b2cda8", - "metadata": {}, - "outputs": [], - "source": [ - "col_url = \"/home/a1r/mycatalog.json\"\n", - "col = intake.open_esm_datastore(col_url)" - ] - }, - { - "cell_type": "markdown", - "id": "344ada01-6716-4fbd-9cee-878ff815d7dd", - "metadata": {}, - "source": [ - "Explore the catalog" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "1ce0716e-6667-4aeb-8c4b-50a05643b87f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
activity_idinstitution_idsource_idexperiment_idfrequencymodeling_realmtable_idmember_idgrid_labelvariable_idtemporal_subsetchunk_freqgrid_label.1platformdimensionscell_methodspath
0devNaNam5c96L65_am5f3b1r0_pdclim1850F3hratmos_cmipNaNNaNNaNpr0002010100-00021231231yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
1devNaNam5c96L65_am5f3b1r0_pdclim1850F3hratmos_cmipNaNNaNNaNrlut0002010100-00021231231yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
2devNaNam5c96L65_am5f3b1r0_pdclim1850F3hratmos_cmipNaNNaNNaNpr0003010100-00031231231yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
3devNaNam5c96L65_am5f3b1r0_pdclim1850F3hratmos_cmipNaNNaNNaNrlut0003010100-00031231231yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
4devNaNam5c96L65_am5f3b1r0_pdclim1850F3hratmos_cmipNaNNaNNaNpr0004010100-00041231231yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
......................................................
6405devNaNam5c96L65_am5f3b1r0_pdclim1850Fmonthlyland_cmipNaNNaNNaNtreeFracNdlDcd001001-0010121yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
6406devNaNam5c96L65_am5f3b1r0_pdclim1850Fmonthlyland_cmipNaNNaNNaNtreeFracNdlEvg001001-0010121yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
6407devNaNam5c96L65_am5f3b1r0_pdclim1850Fmonthlyland_cmipNaNNaNNaNtsl001001-0010121yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
6408devNaNam5c96L65_am5f3b1r0_pdclim1850Fmonthlyland_cmipNaNNaNNaNvegFrac001001-0010121yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
6409devNaNam5c96L65_am5f3b1r0_pdclim1850Fmonthlyland_cmipNaNNaNNaNvegHeight001001-0010121yrNaNgfdl.ncrc5-deploy-prod-openmpNaNts/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd...
\n", - "

6410 rows × 17 columns

\n", - "
" - ], - "text/plain": [ - " activity_id institution_id source_id experiment_id \\\n", - "0 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "1 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "2 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "3 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "4 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "... ... ... ... ... \n", - "6405 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "6406 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "6407 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "6408 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "6409 dev NaN am5 c96L65_am5f3b1r0_pdclim1850F \n", - "\n", - " frequency modeling_realm table_id member_id grid_label \\\n", - "0 3hr atmos_cmip NaN NaN NaN \n", - "1 3hr atmos_cmip NaN NaN NaN \n", - "2 3hr atmos_cmip NaN NaN NaN \n", - "3 3hr atmos_cmip NaN NaN NaN \n", - "4 3hr atmos_cmip NaN NaN NaN \n", - "... ... ... ... ... ... \n", - "6405 monthly land_cmip NaN NaN NaN \n", - "6406 monthly land_cmip NaN NaN NaN \n", - "6407 monthly land_cmip NaN NaN NaN \n", - "6408 monthly land_cmip NaN NaN NaN \n", - "6409 monthly land_cmip NaN NaN NaN \n", - "\n", - " variable_id temporal_subset chunk_freq grid_label.1 \\\n", - "0 pr 0002010100-0002123123 1yr NaN \n", - "1 rlut 0002010100-0002123123 1yr NaN \n", - "2 pr 0003010100-0003123123 1yr NaN \n", - "3 rlut 0003010100-0003123123 1yr NaN \n", - "4 pr 0004010100-0004123123 1yr NaN \n", - "... ... ... ... ... \n", - "6405 treeFracNdlDcd 001001-001012 1yr NaN \n", - "6406 treeFracNdlEvg 001001-001012 1yr NaN \n", - "6407 tsl 001001-001012 1yr NaN \n", - "6408 vegFrac 001001-001012 1yr NaN \n", - "6409 vegHeight 001001-001012 1yr NaN \n", - "\n", - " platform dimensions cell_methods \\\n", - "0 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "1 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "2 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "3 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "4 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "... ... ... ... \n", - "6405 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "6406 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "6407 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "6408 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "6409 gfdl.ncrc5-deploy-prod-openmp NaN ts \n", - "\n", - " path \n", - "0 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "1 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "2 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "3 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "4 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "... ... \n", - "6405 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "6406 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "6407 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "6408 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "6409 /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pd... \n", - "\n", - "[6410 rows x 17 columns]" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "col.df" - ] - }, - { - "cell_type": "markdown", - "id": "613f8259-a92f-4be5-8268-dfbe225f0670", - "metadata": {}, - "source": [ - "Let's narrow down the search" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "62acbaec-573c-47f9-83bc-015790fd7983", - "metadata": {}, - "outputs": [], - "source": [ - "expname_filter = ['c96L65_am5f3b1r0_pdclim1850F']\n", - "modeling_realm = \"land_cmip\"\n", - "frequency = \"daily\"" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "7fa86782-3f7b-4dbf-80af-0f035003d57f", - "metadata": {}, - "outputs": [], - "source": [ - "cat = col.search(experiment_id=expname_filter,frequency=frequency,modeling_realm=modeling_realm)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "6fe2cf2f-e74a-4b50-a099-47c28541878d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'hflsLut', 'mrso', 'mrsos'}" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "set(cat.df[\"variable_id\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "id": "aa216969-e335-4448-977c-d623a62a697e", - "metadata": {}, - "outputs": [], - "source": [ - "cat = cat.search(variable_id=\"mrso\") #Total Soil Moisture Content" - ] - }, - { - "cell_type": "markdown", - "id": "8542c4e8-07eb-48ba-b466-8e07d3405415", - "metadata": {}, - "source": [ - "dmget the files" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "5227091c-5d83-4b73-a340-22e92124e1f7", - "metadata": {}, - "outputs": [], - "source": [ - "#for simple dmget usage, just use this !dmget {file}\n", - "#use following to wrap the dmget call for each path in the catalog\n", - "def dmgetmagic(x):\n", - " cmd = 'dmget %s'% str(x) \n", - " return os.system(cmd)\n", - "\n", - "#OR refer to importing dmget , https://github.com/aradhakrishnanGFDL/canopy-cats/tree/main/notebooks/dmget.py" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "5eb6b01e-4d68-48ee-904f-dd285be7dee5", - "metadata": {}, - "outputs": [], - "source": [ - "dmstatus = cat.df[\"path\"].apply(dmgetmagic)" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "id": "8b50305d-aac1-4df5-add1-fbc9af7773ab", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "--> The keys in the returned dictionary of datasets are constructed as follows:\n", - "\t'source_id.experiment_id.frequency.modeling_realm.variable_id.chunk_freq'\n", - " |████████████████████████████████████████| 100.00% [1/1 00:00<00:00]\r" - ] - } - ], - "source": [ - "dset_dict = cat.to_dataset_dict(cdf_kwargs={'chunks': {'time':5}, 'decode_times': True})" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "f1c27413-e9a7-4855-b9be-1c0b9cf7f4ac", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "am5.c96L65_am5f3b1r0_pdclim1850F.daily.land_cmip.mrso.1yr\n" - ] - } - ], - "source": [ - "for k in dset_dict.keys(): \n", - " print(k)" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "id": "9aae260f-87c8-4d2a-9b55-b9587c1f2309", - "metadata": {}, - "outputs": [], - "source": [ - "ds = dset_dict[\"am5.c96L65_am5f3b1r0_pdclim1850F.daily.land_cmip.mrso.1yr\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "c650221c-714e-4f2e-a53f-ca937c6c38ae", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 757MB\n",
-       "Dimensions:     (time: 3650, bnds: 2, lat: 180, lon: 288)\n",
-       "Coordinates:\n",
-       "    average_DT  (time) timedelta64[ns] 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "    average_T1  (time) object 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "    average_T2  (time) object 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "  * bnds        (bnds) float64 16B 1.0 2.0\n",
-       "  * lat         (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 87.5 88.5 89.5\n",
-       "    lat_bnds    (lat, bnds) float64 3kB dask.array<chunksize=(180, 2), meta=np.ndarray>\n",
-       "  * lon         (lon) float64 2kB 0.625 1.875 3.125 4.375 ... 356.9 358.1 359.4\n",
-       "    lon_bnds    (lon, bnds) float64 5kB dask.array<chunksize=(288, 2), meta=np.ndarray>\n",
-       "  * time        (time) object 29kB 0002-01-01 12:00:00 ... 0011-12-31 12:00:00\n",
-       "    time_bnds   (time, bnds) object 58kB dask.array<chunksize=(5, 2), meta=np.ndarray>\n",
-       "Data variables:\n",
-       "    mrso        (time, lat, lon) float32 757MB dask.array<chunksize=(5, 180, 288), meta=np.ndarray>\n",
-       "Attributes: (12/18)\n",
-       "    title:                            c96L65_am5f3b1r0_pdclim1850F\n",
-       "    grid_type:                        regular\n",
-       "    grid_tile:                        N/A\n",
-       "    code_release_version:             2023.01\n",
-       "    git_hash:                         unknown githash\n",
-       "    external_variables:               land_area\n",
-       "    ...                               ...\n",
-       "    intake_esm_attrs:variable_id:     mrso\n",
-       "    intake_esm_attrs:chunk_freq:      1yr\n",
-       "    intake_esm_attrs:platform:        gfdl.ncrc5-deploy-prod-openmp\n",
-       "    intake_esm_attrs:cell_methods:    ts\n",
-       "    intake_esm_attrs:_data_format_:   netcdf\n",
-       "    intake_esm_dataset_key:           am5.c96L65_am5f3b1r0_pdclim1850F.daily....
" - ], - "text/plain": [ - " Size: 757MB\n", - "Dimensions: (time: 3650, bnds: 2, lat: 180, lon: 288)\n", - "Coordinates:\n", - " average_DT (time) timedelta64[ns] 29kB dask.array\n", - " average_T1 (time) object 29kB dask.array\n", - " average_T2 (time) object 29kB dask.array\n", - " * bnds (bnds) float64 16B 1.0 2.0\n", - " * lat (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 87.5 88.5 89.5\n", - " lat_bnds (lat, bnds) float64 3kB dask.array\n", - " * lon (lon) float64 2kB 0.625 1.875 3.125 4.375 ... 356.9 358.1 359.4\n", - " lon_bnds (lon, bnds) float64 5kB dask.array\n", - " * time (time) object 29kB 0002-01-01 12:00:00 ... 0011-12-31 12:00:00\n", - " time_bnds (time, bnds) object 58kB dask.array\n", - "Data variables:\n", - " mrso (time, lat, lon) float32 757MB dask.array\n", - "Attributes: (12/18)\n", - " title: c96L65_am5f3b1r0_pdclim1850F\n", - " grid_type: regular\n", - " grid_tile: N/A\n", - " code_release_version: 2023.01\n", - " git_hash: unknown githash\n", - " external_variables: land_area\n", - " ... ...\n", - " intake_esm_attrs:variable_id: mrso\n", - " intake_esm_attrs:chunk_freq: 1yr\n", - " intake_esm_attrs:platform: gfdl.ncrc5-deploy-prod-openmp\n", - " intake_esm_attrs:cell_methods: ts\n", - " intake_esm_attrs:_data_format_: netcdf\n", - " intake_esm_dataset_key: am5.c96L65_am5f3b1r0_pdclim1850F.daily...." - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "id": "84071a21-5f29-4554-99cb-7c02bda9d1f7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'mrso' (time: 3650, lat: 180, lon: 288)> Size: 757MB\n",
-       "dask.array<concatenate, shape=(3650, 180, 288), dtype=float32, chunksize=(5, 180, 288), chunktype=numpy.ndarray>\n",
-       "Coordinates:\n",
-       "    average_DT  (time) timedelta64[ns] 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "    average_T1  (time) object 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "    average_T2  (time) object 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "  * lat         (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 87.5 88.5 89.5\n",
-       "  * lon         (lon) float64 2kB 0.625 1.875 3.125 4.375 ... 356.9 358.1 359.4\n",
-       "  * time        (time) object 29kB 0002-01-01 12:00:00 ... 0011-12-31 12:00:00\n",
-       "Attributes:\n",
-       "    units:            kg m-2\n",
-       "    long_name:        Total Soil Moisture Content\n",
-       "    cell_methods:     area: mean time: mean\n",
-       "    ocean_fillvalue:  0.0\n",
-       "    cell_measures:    area: land_area\n",
-       "    time_avg_info:    average_T1,average_T2,average_DT\n",
-       "    standard_name:    soil_moisture_content\n",
-       "    interp_method:    conserve_order1
" - ], - "text/plain": [ - " Size: 757MB\n", - "dask.array\n", - "Coordinates:\n", - " average_DT (time) timedelta64[ns] 29kB dask.array\n", - " average_T1 (time) object 29kB dask.array\n", - " average_T2 (time) object 29kB dask.array\n", - " * lat (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 87.5 88.5 89.5\n", - " * lon (lon) float64 2kB 0.625 1.875 3.125 4.375 ... 356.9 358.1 359.4\n", - " * time (time) object 29kB 0002-01-01 12:00:00 ... 0011-12-31 12:00:00\n", - "Attributes:\n", - " units: kg m-2\n", - " long_name: Total Soil Moisture Content\n", - " cell_methods: area: mean time: mean\n", - " ocean_fillvalue: 0.0\n", - " cell_measures: area: land_area\n", - " time_avg_info: average_T1,average_T2,average_DT\n", - " standard_name: soil_moisture_content\n", - " interp_method: conserve_order1" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds[\"mrso\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "d8e8cd0c-5502-4564-bb12-a269781415ad", - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "mrso = ds.mrso.isel(time=1).plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "68b4a24c-0720-476b-8061-c42c84608e5d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "ds.mrso.mean(dim='time').plot()" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "9212d429-8cd2-4ef6-a498-2fed900091d9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 00020101-00021231\n", - "1 00030101-00031231\n", - "2 00040101-00041231\n", - "3 00050101-00051231\n", - "4 00060101-00061231\n", - "5 00070101-00071231\n", - "6 00080101-00081231\n", - "7 00090101-00091231\n", - "8 00110101-00111231\n", - "9 00100101-00101231\n", - "Name: temporal_subset, dtype: object" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cat.df['temporal_subset'] " - ] - }, - { - "cell_type": "markdown", - "id": "06746aff-889b-4c67-b2d7-fb5ae821a678", - "metadata": {}, - "source": [ - "Can I please leverage CF? " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d7dadd5-7abd-4bf7-a6ca-e39d3c214b04", - "metadata": {}, - "outputs": [], - "source": [ - "pip install cf_xarray" - ] - }, - { - "cell_type": "markdown", - "id": "3f248b8e-2d65-469c-b41f-f1875fac7317", - "metadata": {}, - "source": [ - "#You may leverage the use of cf_xarray, xMIP etc to build your analyses from here. They all blend in." - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "c47d02a6-c340-45f6-8f84-f26e691358ca", - "metadata": {}, - "outputs": [], - "source": [ - "import xarray as xr\n", - "import cf_xarray as cfxr" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "id": "c6cb19f4-6409-4e32-9119-b0d51b42eb33", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.Dataset> Size: 757MB\n",
-       "Dimensions:     (time: 3650, bnds: 2, lat: 180, lon: 288)\n",
-       "Coordinates:\n",
-       "    average_DT  (time) timedelta64[ns] 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "    average_T1  (time) object 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "    average_T2  (time) object 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "  * bnds        (bnds) float64 16B 1.0 2.0\n",
-       "  * lat         (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 87.5 88.5 89.5\n",
-       "    lat_bnds    (lat, bnds) float64 3kB dask.array<chunksize=(180, 2), meta=np.ndarray>\n",
-       "  * lon         (lon) float64 2kB 0.625 1.875 3.125 4.375 ... 356.9 358.1 359.4\n",
-       "    lon_bnds    (lon, bnds) float64 5kB dask.array<chunksize=(288, 2), meta=np.ndarray>\n",
-       "  * time        (time) object 29kB 0002-01-01 12:00:00 ... 0011-12-31 12:00:00\n",
-       "    time_bnds   (time, bnds) object 58kB dask.array<chunksize=(5, 2), meta=np.ndarray>\n",
-       "Data variables:\n",
-       "    mrso        (time, lat, lon) float32 757MB dask.array<chunksize=(5, 180, 288), meta=np.ndarray>\n",
-       "Attributes: (12/18)\n",
-       "    title:                            c96L65_am5f3b1r0_pdclim1850F\n",
-       "    grid_type:                        regular\n",
-       "    grid_tile:                        N/A\n",
-       "    code_release_version:             2023.01\n",
-       "    git_hash:                         unknown githash\n",
-       "    external_variables:               land_area\n",
-       "    ...                               ...\n",
-       "    intake_esm_attrs:variable_id:     mrso\n",
-       "    intake_esm_attrs:chunk_freq:      1yr\n",
-       "    intake_esm_attrs:platform:        gfdl.ncrc5-deploy-prod-openmp\n",
-       "    intake_esm_attrs:cell_methods:    ts\n",
-       "    intake_esm_attrs:_data_format_:   netcdf\n",
-       "    intake_esm_dataset_key:           am5.c96L65_am5f3b1r0_pdclim1850F.daily....
" - ], - "text/plain": [ - " Size: 757MB\n", - "Dimensions: (time: 3650, bnds: 2, lat: 180, lon: 288)\n", - "Coordinates:\n", - " average_DT (time) timedelta64[ns] 29kB dask.array\n", - " average_T1 (time) object 29kB dask.array\n", - " average_T2 (time) object 29kB dask.array\n", - " * bnds (bnds) float64 16B 1.0 2.0\n", - " * lat (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 87.5 88.5 89.5\n", - " lat_bnds (lat, bnds) float64 3kB dask.array\n", - " * lon (lon) float64 2kB 0.625 1.875 3.125 4.375 ... 356.9 358.1 359.4\n", - " lon_bnds (lon, bnds) float64 5kB dask.array\n", - " * time (time) object 29kB 0002-01-01 12:00:00 ... 0011-12-31 12:00:00\n", - " time_bnds (time, bnds) object 58kB dask.array\n", - "Data variables:\n", - " mrso (time, lat, lon) float32 757MB dask.array\n", - "Attributes: (12/18)\n", - " title: c96L65_am5f3b1r0_pdclim1850F\n", - " grid_type: regular\n", - " grid_tile: N/A\n", - " code_release_version: 2023.01\n", - " git_hash: unknown githash\n", - " external_variables: land_area\n", - " ... ...\n", - " intake_esm_attrs:variable_id: mrso\n", - " intake_esm_attrs:chunk_freq: 1yr\n", - " intake_esm_attrs:platform: gfdl.ncrc5-deploy-prod-openmp\n", - " intake_esm_attrs:cell_methods: ts\n", - " intake_esm_attrs:_data_format_: netcdf\n", - " intake_esm_dataset_key: am5.c96L65_am5f3b1r0_pdclim1850F.daily...." - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "xr.decode_cf(ds)" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "id": "0dc03c24-25b6-48f6-9c44-d8bb677244eb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'mrso' (time: 0, lat: 180, lon: 288)> Size: 0B\n",
-       "dask.array<getitem, shape=(0, 180, 288), dtype=float32, chunksize=(0, 180, 288), chunktype=numpy.ndarray>\n",
-       "Coordinates:\n",
-       "    average_DT  (time) float64 0B dask.array<chunksize=(0,), meta=np.ndarray>\n",
-       "    average_T1  (time) float64 0B dask.array<chunksize=(0,), meta=np.ndarray>\n",
-       "    average_T2  (time) float64 0B dask.array<chunksize=(0,), meta=np.ndarray>\n",
-       "  * lat         (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 87.5 88.5 89.5\n",
-       "  * lon         (lon) float64 2kB 0.625 1.875 3.125 4.375 ... 356.9 358.1 359.4\n",
-       "  * time        (time) float64 0B \n",
-       "Attributes:\n",
-       "    units:            kg m-2\n",
-       "    long_name:        Total Soil Moisture Content\n",
-       "    cell_methods:     area: mean time: mean\n",
-       "    ocean_fillvalue:  0.0\n",
-       "    cell_measures:    area: land_area\n",
-       "    time_avg_info:    average_T1,average_T2,average_DT\n",
-       "    standard_name:    soil_moisture_content\n",
-       "    interp_method:    conserve_order1
" - ], - "text/plain": [ - " Size: 0B\n", - "dask.array\n", - "Coordinates:\n", - " average_DT (time) float64 0B dask.array\n", - " average_T1 (time) float64 0B dask.array\n", - " average_T2 (time) float64 0B dask.array\n", - " * lat (lat) float64 1kB -89.5 -88.5 -87.5 -86.5 ... 87.5 88.5 89.5\n", - " * lon (lon) float64 2kB 0.625 1.875 3.125 4.375 ... 356.9 358.1 359.4\n", - " * time (time) float64 0B \n", - "Attributes:\n", - " units: kg m-2\n", - " long_name: Total Soil Moisture Content\n", - " cell_methods: area: mean time: mean\n", - " ocean_fillvalue: 0.0\n", - " cell_measures: area: land_area\n", - " time_avg_info: average_T1,average_T2,average_DT\n", - " standard_name: soil_moisture_content\n", - " interp_method: conserve_order1" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.mrso.sel(time=slice(\"0002-01-01\",\"0004-01-01\"))" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "4f443874-7a2d-4856-b687-84a8f02a0f83", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
<xarray.DataArray 'time' (time: 3650)> Size: 29kB\n",
-       "array([ 365.5,  366.5,  367.5, ..., 4012.5, 4013.5, 4014.5])\n",
-       "Coordinates:\n",
-       "    average_DT  (time) float64 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "    average_T1  (time) float64 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "    average_T2  (time) float64 29kB dask.array<chunksize=(5,), meta=np.ndarray>\n",
-       "  * time        (time) float64 29kB 365.5 366.5 367.5 ... 4.014e+03 4.014e+03\n",
-       "Attributes:\n",
-       "    units:          days since 0001-01-01 00:00:00\n",
-       "    long_name:      time\n",
-       "    axis:           T\n",
-       "    calendar_type:  NOLEAP\n",
-       "    calendar:       noleap\n",
-       "    bounds:         time_bnds\n",
-       "    cell_methods:   time: mean
" - ], - "text/plain": [ - " Size: 29kB\n", - "array([ 365.5, 366.5, 367.5, ..., 4012.5, 4013.5, 4014.5])\n", - "Coordinates:\n", - " average_DT (time) float64 29kB dask.array\n", - " average_T1 (time) float64 29kB dask.array\n", - " average_T2 (time) float64 29kB dask.array\n", - " * time (time) float64 29kB 365.5 366.5 367.5 ... 4.014e+03 4.014e+03\n", - "Attributes:\n", - " units: days since 0001-01-01 00:00:00\n", - " long_name: time\n", - " axis: T\n", - " calendar_type: NOLEAP\n", - " calendar: noleap\n", - " bounds: time_bnds\n", - " cell_methods: time: mean" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.mrso.time" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a61e9c94-5d20-44d1-9a0a-6dab48dc444c", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "intakebuilder", - "language": "python", - "name": "intakebuilder" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.2" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/doc/generation.rst b/doc/generation.rst index a90698f..c91d29c 100644 --- a/doc/generation.rst +++ b/doc/generation.rst @@ -126,28 +126,82 @@ From a Python script Do you have a python script or a notebook where you could also include steps to generate a data catalog? See example `here `_ -Here is another example +Here is another example *with a custom configuration*: .. code-block:: console - #!/usr/bin/env python + import sys, os + git_package_dir = '/home/a1r/git/forkCatalogBuilder-/' + sys.path.append(git_package_dir) - #TODO test after conda pkg is published and make changes as needed - from catalogbuilder.scripts import gen_intake_gfdl - import sys + import catalogbuilder + from catalogbuilder.scripts import gen_intake_gfdl + ######USER input begins######## - input_path = "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" - output_path = "test" - try: - gen_intake_gfdl.create_catalog(input_path,output_path) - except: - sys.exit("Exception occured calling gen_intake_gfdl.create_catalog") + #User provides the input directory for which a data catalog needs to be generated. + input_path = "/archive/John.Krasting/fre/FMS2024.02_OM5_20240724/CM4.5v01_om5b06_piC_noBLING/gfdl.ncrc5-intel23-prod-openmp/pp/" + #/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/" + + #USER inputs the output path. Based on the following setting, user can expect to see /home/a1r/mycatalog.csv and /home/a1r/mycatalog.json generated as output. + + output_path = "/home/a1r/tests/mycatalog-jpk-def" + #NOTE: If your input_path does not look like the above in general, you will need to pass a --config which is custom + + #This is an example call to run catalog builder using a yaml config file. + configyaml = os.path.join(git_package_dir, 'catalogbuilder/scripts/configs/config-example2.yml') + #input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" + #output_path = "sample-mdtf-catalog" + + def create_catalog_from_config(input_path=input_path,output_path=output_path,configyaml=configyaml): + csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml) + return(csv,json) + + if __name__ == '__main__': + create_catalog_from_config(input_path,output_path) #,configyaml) + +And an example *with a default configuration*: + +.. code-block:: console + + import sys, os + git_package_dir = '/home/a1r/git/forkCatalogBuilder-/' + sys.path.append(git_package_dir) + + import catalogbuilder + from catalogbuilder.scripts import gen_intake_gfdl + print(gen_intake_gfdl.__file__) + + ######USER input begins######## + + #User provides the input directory for which a data catalog needs to be generated. + + input_path = "/archive/a1r/fre/FMS2024.02_OM5_20240724/CM4.5v01_om5b06_piC_noBLING/gfdl.ncrc5-intel23-prod-openmp/pp/" + #/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/" + + #USER inputs the output path. Based on the following setting, user can expect to see /home/a1r/mycatalog.csv and /home/a1r/mycatalog.json generated as output. + + output_path = "/home/a1r/tests/static-catalog" + #NOTE: If your input_path does not look like the above in general, you will need to pass a --config which is custom + ####END OF user input ########## + + #This is an example call to run catalog builder using a yaml config file. + + configyaml = os.path.join(git_package_dir, 'configs/config-template.yaml') + #input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" + #output_path = "sample-mdtf-catalog" + + def create_catalog_from_config(input_path=input_path,output_path=output_path): #,configyaml=configyaml): + csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path)#,verbose=True,config=configyaml) + return(csv,json) + + if __name__ == '__main__': + csv,json = create_catalog_from_config(input_path,output_path)#,configyaml) + From Jupyter Notebook --------------------- -Refer to this `notebook `_ to see how you can generate catalogs from a Jupyter Notebook - +Refer to this `notebook `_ to see how you can generate catalogs from a Jupyter Notebook .. image:: _static/catalog_generation.png :alt: Screenshot of a notebook showing catalog generation @@ -178,11 +232,23 @@ See `Flags`_ here. See `Fre-CLI Documentation here `_ -Flags +Arguments/Options _____ +**Input/Output paths can be passed directly to catalog builder tool through calling command** + +All methods of catalog builder generation support direct input/output path passing. + +Input path must be the 1st argument. Output path must be the 2nd. + +Ex. gen_intake_gfdl.py /archive/Some.User/input-path ./output_path + + .. Reference `Flags`_. +- --config - Allows for catalogs to be generated with a custom configuration. Requires path to YAML configuration file. (Ex. "--config custom_config.yaml") - --overwrite - Overwrite an existing catalog at the given output path - --append - Append (without headerlist) to an existing catalog at the given output path -- --slow - Activates slow mode which retrieves standard_name `(or long_name) where possible. **"Standard_name" must be in your output_path_template** +- --slow - Activates slow mode which retrieves standard_name (or long_name) where possible. **"Standard_name" must be in your output_path_template** +- --i - Optional method for passing input path +- --o - Optional method for passing output path