generated from NOAA-GFDL/template-repository
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
tmerlis additions to config, notebooks to build and use X-SHiELD data…
… catalog on stellar HPC
- Loading branch information
Timothy Merlis
committed
Nov 26, 2024
1 parent
038de4d
commit 3d21a4e
Showing
3 changed files
with
2,629 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
#what kind of directory structure to expect? | ||
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp | ||
# the output_path_template is set as follows. | ||
#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we | ||
#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example | ||
#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure | ||
#this is a valid value in headerlist as well. | ||
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template | ||
#for the fourth value. | ||
|
||
#catalog headers | ||
#The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction | ||
#with the ESM collection specification standards and the appropriate workflows. | ||
|
||
# default, failed because standard_name not compatible with X-SHiELD output | ||
# may be okay with updated diag table that uses cmip variable names | ||
#headerlist: ["activity_id", "institution_id", "source_id", "experiment_id", | ||
# "frequency", "realm", "table_id", | ||
# "member_id", "grid_label", "variable_id", | ||
# "time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"] | ||
|
||
# working prototype v1 | ||
#headerlist: ["activity_id", "institution_id", "source_id", "experiment_id", | ||
# "frequency", "realm", "table_id", | ||
# "member_id", "grid_label", "variable_id", | ||
# "time_range", "chunk_freq","platform","dimensions","cell_methods","path"] | ||
|
||
# eliminated some unused headers, but | ||
# eliminating frequency, realm, table_id, chunk_freq, causes a problem | ||
# when col.search is called | ||
headerlist: ["source_id", "experiment_id", | ||
"frequency", "realm", "table_id", | ||
"member_id", "grid_label", "variable_id", | ||
"time_range", "chunk_freq","platform","path"] | ||
|
||
#headerlist: ["source_id","platform","activity_id", "experiment_id", | ||
# "frequency", "realm", "table_id", | ||
# "member_id", "grid_label", "variable_id", | ||
# "time_range", "chunk_freq","platform","dimensions","cell_methods","path"] | ||
|
||
#output_path_template: ['NA','NA','source_id','platform','activity_id','experiment_id','custom_pp','time_range'] | ||
|
||
#what kind of directory structure to expect? | ||
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp | ||
# the output_path_template is set as follows. | ||
#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we | ||
#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example | ||
#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure | ||
#this is a valid value in headerlist as well. | ||
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template | ||
#for the fourth value. | ||
|
||
output_path_template: ['NA','NA','NA','source_id','platform','member_id','experiment_id','custom_pp','time_range'] | ||
|
||
output_file_template: ['variable_id','NA'] | ||
|
||
#OUTPUT FILE INFO is currently passed as command-line argument. | ||
#We will revisit adding a csvfile, jsonfile and logfile configuration to the builder configuration file in the future. | ||
#csvfile = #jsonfile = #logfile = | ||
|
||
####################################################### | ||
|
||
# note: this input path is built from softlinks to the directory /scratch/cimes/GLOBALFV3/stellar_run/ | ||
# cp -as /scratch/cimes/GLOBALFV3/stellar_run/processed/ /scratch/cimes/tmerlis/GLOBALFV3/stellar_run/processed | ||
# cp -as /scratch/cimes/GLOBALFV3/stellar_run/processed_new/ /scratch/cimes/tmerlis/GLOBALFV3/stellar_run/processed_new | ||
# this avoids the other directories that contain other experiments that have not been 'processed' | ||
input_path: "/scratch/cimes/tmerlis/GLOBALFV3/stellar_run/" | ||
output_path: "/home/tmerlis/hackathon/catbuild/pire4" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "2a5a9746-b56f-4462-b938-ae17f7129a8d", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import catalogbuilder\n", | ||
"from catalogbuilder.scripts import gen_intake_gfdl\n", | ||
"import sys, os" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"id": "1b0f5306-0032-4e0a-8c4a-b5bcb1d2ab7f", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#This is an example call to run catalog builder using a yaml config file.\n", | ||
"\n", | ||
"configyaml = '/home/tmerlis/hackathon/catbuild/config-xshield_stellar.yaml' \n", | ||
"# soft link to processed and processed_new **only** from within '/scratch/cimes/GLOBALFV3/stellar_run/' \n", | ||
"input_path = '/scratch/cimes/tmerlis/GLOBALFV3/stellar_run/' \n", | ||
"output_path = '/home/tmerlis/hackathon/catbuild/xshield_cat' " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "e8d89aec-655e-4729-820b-a37ffb60454e", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"INFO:local:[Mostly] silent log activated\n", | ||
"INFO:local:Default schema: catalogbuilder/cats/gfdl_template.json\n", | ||
"INFO:local:input path: /scratch/cimes/tmerlis/GLOBALFV3/stellar_run/\n", | ||
"INFO:local: output path: /home/tmerlis/hackathon/catbuild/xshield_cat\n", | ||
"JSON generated at: /home/tmerlis/hackathon/catbuild/xshield_cat.json\n", | ||
"CSV generated at: /home/tmerlis/hackathon/catbuild/xshield_cat.csv\n", | ||
"INFO:local:CSV generated at/home/tmerlis/hackathon/catbuild/xshield_cat.csv\n", | ||
"CPU times: user 4.76 s, sys: 12.6 s, total: 17.3 s\n", | ||
"Wall time: 2min 5s\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%%time\n", | ||
"def create_catalog_from_config(input_path=input_path,output_path=output_path,configyaml=configyaml):\n", | ||
" csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml)\n", | ||
" return(csv,json)\n", | ||
"\n", | ||
"if __name__ == '__main__':\n", | ||
" create_catalog_from_config(input_path,output_path,configyaml)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "69de4f3a-c622-444f-9bdf-cb3459d3261c", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"No traceback available to show.\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%tb" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "catalogbuilder [~/.conda/envs/catalogbuilder/]", | ||
"language": "python", | ||
"name": "conda_catalogbuilder" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.13.0" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Oops, something went wrong.